Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicit ghost-cell version of heat code #1

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions heat_fd_solvers/heat_1d.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import Time.stopwatch;

// ---- set up simulation parameters ------
// declare configurable parameters with default values
config const xLen = 2.0, // length of the grid in x
nx = 31, // number of grid points in x
nt = 50, // number of time steps
sigma = 0.25, // stability parameter
nu = 0.05; // viscosity

// declare non-configurable parameters
const dx : real = xLen / (nx - 1), // grid spacing in x
dt : real = sigma * dx / nu; // time step size

var t = new stopwatch();

// ---- set up the grid ------
// define a domain and subdomain to describe the grid and its interior
const indices = {0..<nx},
indicesInner = {1..<nx-1};

// define a 2D array over the above domain
var u : [indices] real;

// set up initial conditions
u = 1.0;
u[(0.5 / dx):int..<(1.0 / dx + 1):int] = 2;

// ---- run the finite difference computation ------
// create a temporary copy of 'u' to store the previous time step
var un = u;

// start timing
t.start();

// iterate for 'nt' time steps
for 1..nt {

// swap the arrays to prepare for the next time step
u <=> un;

// update the solution over the interior of the domain in parallel
forall i in indicesInner {
u[i] = un[i] + nu * dt / dx**2 *
(un[i-1] - 2 * un[i] + un[i+1]);
}
}

// stop timing
t.stop();

// ---- print final results ------
const mean = (+ reduce u) / u.size,
stdDev = sqrt((+ reduce (u - mean)**2) / u.size);

writeln("mean: ", mean, " stdDev: ", stdDev);
writeln("elapsed time: ", t.elapsed(), " seconds");
86 changes: 86 additions & 0 deletions heat_fd_solvers/heat_1d_explicit.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import Time.stopwatch;

// ---- set up simulation parameters ------
// declare configurable parameters with default values
config const xLen = 2.0, // length of the grid in x
nx = 31, // number of grid points in x
nt = 50, // number of time steps
sigma = 0.25, // stability parameter
nu = 0.05; // viscosity

// declare non-configurable parameters
const dx : real = xLen / (nx - 1), // grid spacing in x
dt : real = sigma * dx / nu, // time step size
nTasks = here.maxTaskPar, // number of tasks
npt = nx / nTasks; // number of grid points per task

param LEFT = 0, RIGHT = 1;

var t = new stopwatch();

// ---- set up the grid ------
// define a domain to describe the grid
const indices = {0..<nx};

// define a 2D array over the above domain
var u : [indices] real;

// ---- set up ghost cells ------
var ghosts : [0..1, 0..<nTasks] sync real;

// set up initial conditions
u = 1.0;
u[(0.5 / dx):int..<(1.0 / dx + 1):int] = 2;

proc work(tid: int) {
// define region of the global array owned by this task
const lo = tid * npt,
hi = min((tid + 1) * npt, nx);

const taskIndices = {lo..<hi},
taskIndicesInner = taskIndices.expand(-1);

// declare local array and load values from global array
var uLocal1, uLocal2: [taskIndices] real;
uLocal1 = u[taskIndices];

// write initial conditions to ghosts
if tid != 0 then ghosts[RIGHT, tid-1].writeEF(uLocal1[taskIndicesInner.low]);
if tid != nTasks-1 then ghosts[LEFT, tid+1].writeEF(uLocal1[taskIndicesInner.high]);

for 1..nt {
// load values from ghost cells into local array's borders
if tid != 0 then uLocal1[taskIndices.low] = ghosts[LEFT, tid].readFE();
if tid != nTasks-1 then uLocal1[taskIndices.high] = ghosts[RIGHT, tid].readFE();

// run kernel computation
foreach i in taskIndicesInner do
uLocal2[i] = uLocal1[i] + nu * dt / dx**2 *
(uLocal1[i-1] - 2 * uLocal1[i] + uLocal1[i+1]);

// write results to ghost cells
if tid != 0 then ghosts[RIGHT, tid-1].writeEF(uLocal2[taskIndicesInner.low]);
if tid != nTasks-1 then ghosts[LEFT, tid+1].writeEF(uLocal2[taskIndicesInner.high]);

uLocal1 <=> uLocal2;
}

// store results in the global array
u[taskIndices] = uLocal1;
}

// start timing
t.start();

// run the simulation across tasks
coforall tid in 0..<nTasks do work(tid);

// stop timing
t.stop();

// ---- print final results ------
const mean = (+ reduce u) / u.size,
stdDev = sqrt((+ reduce (u - mean)**2) / u.size);

writeln("mean: ", mean, " stdDev: ", stdDev);
writeln("elapsed time: ", t.elapsed(), " seconds");
102 changes: 102 additions & 0 deletions heat_fd_solvers/heat_1d_explicit_dist.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import Time.stopwatch;
import BlockDist.Block;
use CommDiagnostics;

// ---- set up simulation parameters ------
// declare configurable parameters with default values
config const xLen = 2.0, // length of the grid in x
nx = 31, // number of grid points in x
nt = 50, // number of time steps
sigma = 0.25, // stability parameter
nu = 0.05; // viscosity

// declare non-configurable parameters
const dx : real = xLen / (nx - 1), // grid spacing in x
dt : real = sigma * dx / nu, // time step size
nTasksPerLoc = here.maxTaskPar, // number of tasks
nTasks = Locales.size * nTasksPerLoc, // total number of tasks
npt = nx / nTasks; // number of grid points per task

param LEFT = 0, RIGHT = 1;

var t = new stopwatch();

// ---- set up the grid ------
// define a domain and distribution to describe the grid
const indices = {0..<nx},
INDICES: domain(1) dmapped Block(indices) = indices;

// define a distributed 2D array over the above domain
var u : [INDICES] real;

// ---- set up ghost cells ------
const gdom = {0..<Locales.size},
GDOM: domain(1) dmapped Block(gdom) = gdom;
var ghosts: [GDOM] [0..1, 0..<nTasksPerLoc] sync real;

// set up initial conditions
u = 1.0;
u[(0.5 / dx):int..<(1.0 / dx + 1):int] = 2;

proc work(locId: int, tid: int) {
// define region of the global array owned by this task
const globalIdx = locId * nTasksPerLoc + tid,
lo = globalIdx * npt,
hi = min((globalIdx + 1) * npt, nx);

const taskIndices = {lo..<hi},
taskIndicesInner = taskIndices.expand(-1);

const lM1 = if tid == 0 then locId - 1 else locId,
lP1 = if tid == nTasksPerLoc-1 then locId + 1 else locId,
tM1 = if tid == 0 then nTasksPerLoc-1 else tid-1,
tP1 = if tid == nTasksPerLoc-1 then 0 else tid+1;

// declare local array and load values from global array
var uLocal1, uLocal2: [taskIndices] real;
uLocal1 = u[taskIndices];

// write initial conditions to ghosts
if globalIdx != 0 then ghosts[lM1][RIGHT, tM1].writeEF(uLocal1[taskIndicesInner.low]);
if globalIdx != nTasks-1 then ghosts[lP1][LEFT, tP1].writeEF(uLocal1[taskIndicesInner.high]);

for 1..nt {
// load values from ghost cells into local array's borders
if globalIdx != 0 then uLocal1[taskIndices.low] = ghosts[locId][LEFT, tid].readFE();
if globalIdx != nTasks-1 then uLocal1[taskIndices.high] = ghosts[locId][RIGHT, tid].readFE();

// run kernel computation
foreach i in taskIndicesInner do
uLocal2[i] = uLocal1[i] + nu * dt / dx**2 *
(uLocal1[i-1] - 2 * uLocal1[i] + uLocal1[i+1]);

// write results to ghost cells
if globalIdx != 0 then ghosts[lM1][RIGHT, tM1].writeEF(uLocal2[taskIndicesInner.low]);
if globalIdx != nTasks-1 then ghosts[lP1][LEFT, tP1].writeEF(uLocal2[taskIndicesInner.high]);

uLocal1 <=> uLocal2;
}

// store results in the global array
u[taskIndices] = uLocal1;
}

// start timing and comm diagnostics
t.start();
startVerboseComm();

// run the simulation across tasks
coforall (loc, lid) in zip(Locales, 0..) do on loc {
coforall tid in 0..<nTasksPerLoc do work(lid, tid);
}

// stop timing and comm diagnostics
stopVerboseComm();
t.stop();

// ---- print final results ------
const mean = (+ reduce u) / u.size,
stdDev = sqrt((+ reduce (u - mean)**2) / u.size);

writeln("mean: ", mean, " stdDev: ", stdDev);
writeln("elapsed time: ", t.elapsed(), " seconds");
132 changes: 132 additions & 0 deletions heat_fd_solvers/heat_2d_explicit_dist.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import Time.stopwatch,
BlockDist.Block,
Collectives.barrier;

use CommDiagnostics;
config param runCommDiag = false;

config const xLen = 2.0, // length of the grid in x
yLen = 2.0, // length of the grid in y
nx = 31, // number of grid points in x
ny = 31, // number of grid points in y
nt = 50, // number of time steps
sigma = 0.25, // stability parameter
nu = 0.05; // viscosity

const dx : real = xLen / (nx - 1), // grid spacing in x
dy : real = yLen / (ny - 1), // grid spacing in y
dt : real = sigma * dx * dy / nu; // time step size

// define block distributed array
const indices = {0..<nx, 0..<ny},
indicesInner = indices.expand(-1),
INDICES = indices dmapped Block(indicesInner);
var u: [INDICES] real;

// apply initial conditions
u = 1.0;
u[
(0.5 / dx):int..<(1.0 / dx + 1):int,
(0.5 / dy):int..<(1.0 / dy + 1):int
] = 2;

// set up array of ghost vectors over same locale distribution as 'u'
var ghostVecs: [u.targetLocales().domain] [0..<4] GhostVec;

param L = 0, R = 1, T = 2, B = 3;
const tidXMax = u.targetLocales().dim(0).high - 1,
tidYMax = u.targetLocales().dim(1).high - 1;

// set up barrier and timer
var b = new barrier(numLocales),
t = new stopwatch();
t.start();

if runCommDiag then startVerboseComm();

// execute the FD compuation with one task per locale
coforall (loc, (i, j)) in zip(u.targetLocales(), u.targetLocales().domain) do on loc {
// initialize ghost vectors
for param edge in 0..<4 {
param xy = if edge < 2 then 1 else 0;
ghostVecs[i, j][edge] = new GhostVec(u.localSubdomain().dim(xy).expand(1));
}

// synchronize across tasks
b.barrier();

// run the portion of the FD computation owned by this task
work(i, j);
}

if runCommDiag then stopVerboseComm();

t.stop();
writeln("elapsed time: ", t.elapsed(), " seconds");

proc work(tidX: int, tidY: int) {
// declare two local sub-arrays with room to store neighboring locale's edges
const localIndices = u.localSubdomain(here),
localIndicesBuffered = localIndices.expand(1);

var uLocal1, uLocal2: [localIndicesBuffered] real;

// populate first local array with initial conditions from global array
uLocal1[localIndices] = u[localIndices];

// convenient constants for indexing into edges of local array
const LL = localIndicesBuffered.dim(0).low,
RR = localIndicesBuffered.dim(0).high,
BB = localIndicesBuffered.dim(1).low,
TT = localIndicesBuffered.dim(1).high;

// preliminarily populate ghost regions for neighboring locales
if tidX > 0 then ghostVecs[tidX-1, tidY][R].v = uLocal1[LL+1, ..];
if tidX < tidXMax then ghostVecs[tidX+1, tidY][L].v = uLocal1[RR-1, ..];
if tidY > 0 then ghostVecs[tidX, tidY-1][B].v = uLocal1[.., TT-1];
if tidY < tidYMax then ghostVecs[tidX, tidY+1][T].v = uLocal1[.., BB+1];

b.barrier();

// run FD computation
for 1..nt {
// populate local edges from ghost regions
if tidX > 0 then uLocal1[LL, ..] = ghostVecs[tidX, tidY][L].v;
if tidX < tidXMax then uLocal1[RR, ..] = ghostVecs[tidX, tidY][R].v;
if tidY > 0 then uLocal1[.., TT] = ghostVecs[tidX, tidY][T].v;
if tidY < tidYMax then uLocal1[.., BB] = ghostVecs[tidX, tidY][B].v;

// compute the FD kernel in parallel
forall (i, j) in localIndices {
uLocal2[i, j] = uLocal1[i, j] +
nu * dt / dy**2 *
(uLocal1[i-1, j] - 2 * uLocal1[i, j] + uLocal1[i+1, j]) +
nu * dt / dx**2 *
(uLocal1[i, j-1] - 2 * uLocal1[i, j] + uLocal1[i, j+1]);
}

// populate ghost regions for neighboring locales
if tidX > 0 then ghostVecs[tidX-1, tidY][R].v = uLocal2[LL+1, ..];
if tidX < tidXMax then ghostVecs[tidX+1, tidY][L].v = uLocal2[RR-1, ..];
if tidY > 0 then ghostVecs[tidX, tidY-1][B].v = uLocal2[.., TT-1];
if tidY < tidYMax then ghostVecs[tidX, tidY+1][T].v = uLocal2[.., BB+1];

// synchronize with other tasks
b.barrier();

// swap arrays
uLocal1 <=> uLocal2;
}

// store results back in global array
u[localIndices] = uLocal1[localIndices];
}

record GhostVec {
var d: domain(1);
var v: [d] real;

proc init() do this.d = {0..0};
proc init(r: range(int, boundKind.both, strideKind.one)) do
this.d = {r};
}