mstrout · jeremiah-corrado · Jun 2, 2023 · Jun 5, 2023 · Jun 6, 2023 · Jun 15, 2023
diff --git a/heat_fd_solvers/heat_1d.chpl b/heat_fd_solvers/heat_1d.chpl
@@ -0,0 +1,57 @@
+import Time.stopwatch;
+
+// ---- set up simulation parameters ------
+// declare configurable parameters with default values
+config const xLen = 2.0,    // length of the grid in x
+             nx = 31,       // number of grid points in x
+             nt = 50,       // number of time steps
+             sigma = 0.25,  // stability parameter
+             nu = 0.05;     // viscosity
+
+// declare non-configurable parameters
+const dx : real = xLen / (nx - 1),       // grid spacing in x
+      dt : real = sigma * dx / nu;       // time step size
+
+var t = new stopwatch();
+
+// ---- set up the grid ------
+// define a domain and subdomain to describe the grid and its interior
+const indices = {0..<nx},
+      indicesInner = {1..<nx-1};
+
+// define a 2D array over the above domain
+var u : [indices] real;
+
+// set up initial conditions
+u = 1.0;
+u[(0.5 / dx):int..<(1.0 / dx + 1):int] = 2;
+
+// ---- run the finite difference computation ------
+// create a temporary copy of 'u' to store the previous time step
+var un = u;
+
+// start timing
+t.start();
+
+// iterate for 'nt' time steps
+for 1..nt {
+
+  // swap the arrays to prepare for the next time step
+  u <=> un;
+
+  // update the solution over the interior of the domain in parallel
+  forall i in indicesInner {
+    u[i] = un[i] + nu * dt / dx**2 *
+                (un[i-1] - 2 * un[i] + un[i+1]);
+  }
+}
+
+// stop timing
+t.stop();
+
+// ---- print final results ------
+const mean = (+ reduce u) / u.size,
+      stdDev = sqrt((+ reduce (u - mean)**2) / u.size);
+
+writeln("mean: ", mean, " stdDev: ", stdDev);
+writeln("elapsed time: ", t.elapsed(), " seconds");
diff --git a/heat_fd_solvers/heat_1d_explicit.chpl b/heat_fd_solvers/heat_1d_explicit.chpl
@@ -0,0 +1,86 @@
+import Time.stopwatch;
+
+// ---- set up simulation parameters ------
+// declare configurable parameters with default values
+config const xLen = 2.0,    // length of the grid in x
+             nx = 31,       // number of grid points in x
+             nt = 50,       // number of time steps
+             sigma = 0.25,  // stability parameter
+             nu = 0.05;     // viscosity
+
+// declare non-configurable parameters
+const dx : real = xLen / (nx - 1),       // grid spacing in x
+      dt : real = sigma * dx / nu,       // time step size
+      nTasks = here.maxTaskPar,          // number of tasks
+      npt = nx / nTasks;                 // number of grid points per task
+
+param LEFT = 0, RIGHT = 1;
+
+var t = new stopwatch();
+
+// ---- set up the grid ------
+// define a domain to describe the grid
+const indices = {0..<nx};
+
+// define a 2D array over the above domain
+var u : [indices] real;
+
+// ---- set up ghost cells ------
+var ghosts : [0..1, 0..<nTasks] sync real;
+
+// set up initial conditions
+u = 1.0;
+u[(0.5 / dx):int..<(1.0 / dx + 1):int] = 2;
+
+proc work(tid: int) {
+  // define region of the global array owned by this task
+  const lo = tid * npt,
+        hi = min((tid + 1) * npt, nx);
+
+  const taskIndices = {lo..<hi},
+        taskIndicesInner = taskIndices.expand(-1);
+
+  // declare local array and load values from global array
+  var uLocal1, uLocal2: [taskIndices] real;
+  uLocal1 = u[taskIndices];
+
+  // write initial conditions to ghosts
+  if tid != 0        then ghosts[RIGHT, tid-1].writeEF(uLocal1[taskIndicesInner.low]);
+  if tid != nTasks-1 then ghosts[LEFT, tid+1].writeEF(uLocal1[taskIndicesInner.high]);
+
+  for 1..nt {
+    // load values from ghost cells into local array's borders
+    if tid != 0        then uLocal1[taskIndices.low] = ghosts[LEFT, tid].readFE();
+    if tid != nTasks-1 then uLocal1[taskIndices.high] = ghosts[RIGHT, tid].readFE();
+
+    // run kernel computation
+    foreach i in taskIndicesInner do
+      uLocal2[i] = uLocal1[i] + nu * dt / dx**2 *
+                (uLocal1[i-1] - 2 * uLocal1[i] + uLocal1[i+1]);
+
+    // write results to ghost cells
+    if tid != 0        then ghosts[RIGHT, tid-1].writeEF(uLocal2[taskIndicesInner.low]);
+    if tid != nTasks-1 then ghosts[LEFT, tid+1].writeEF(uLocal2[taskIndicesInner.high]);
+
+    uLocal1 <=> uLocal2;
+  }
+
+  // store results in the global array
+  u[taskIndices] = uLocal1;
+}
+
+// start timing
+t.start();
+
+// run the simulation across tasks
+coforall tid in 0..<nTasks do work(tid);
+
+// stop timing
+t.stop();
+
+// ---- print final results ------
+const mean = (+ reduce u) / u.size,
+      stdDev = sqrt((+ reduce (u - mean)**2) / u.size);
+
+writeln("mean: ", mean, " stdDev: ", stdDev);
+writeln("elapsed time: ", t.elapsed(), " seconds");
diff --git a/heat_fd_solvers/heat_1d_explicit_dist.chpl b/heat_fd_solvers/heat_1d_explicit_dist.chpl
@@ -0,0 +1,102 @@
+import Time.stopwatch;
+import BlockDist.Block;
+use CommDiagnostics;
+
+// ---- set up simulation parameters ------
+// declare configurable parameters with default values
+config const xLen = 2.0,    // length of the grid in x
+             nx = 31,       // number of grid points in x
+             nt = 50,       // number of time steps
+             sigma = 0.25,  // stability parameter
+             nu = 0.05;     // viscosity
+
+// declare non-configurable parameters
+const dx : real = xLen / (nx - 1),          // grid spacing in x
+      dt : real = sigma * dx / nu,          // time step size
+      nTasksPerLoc = here.maxTaskPar,       // number of tasks
+      nTasks = Locales.size * nTasksPerLoc, // total number of tasks
+      npt = nx / nTasks;                    // number of grid points per task
+
+param LEFT = 0, RIGHT = 1;
+
+var t = new stopwatch();
+
+// ---- set up the grid ------
+// define a domain and distribution to describe the grid
+const indices = {0..<nx},
+      INDICES: domain(1) dmapped Block(indices) = indices;
+
+// define a distributed 2D array over the above domain
+var u : [INDICES] real;
+
+// ---- set up ghost cells ------
+const gdom = {0..<Locales.size},
+      GDOM: domain(1) dmapped Block(gdom) = gdom;
+var ghosts: [GDOM] [0..1, 0..<nTasksPerLoc] sync real;
+
+// set up initial conditions
+u = 1.0;
+u[(0.5 / dx):int..<(1.0 / dx + 1):int] = 2;
+
+proc work(locId: int, tid: int) {
+  // define region of the global array owned by this task
+  const globalIdx = locId * nTasksPerLoc + tid,
+        lo = globalIdx * npt,
+        hi = min((globalIdx + 1) * npt, nx);
+
+  const taskIndices = {lo..<hi},
+        taskIndicesInner = taskIndices.expand(-1);
+
+  const lM1 = if tid == 0 then locId - 1 else locId,
+        lP1 = if tid == nTasksPerLoc-1 then locId + 1 else locId,
+        tM1 = if tid == 0 then nTasksPerLoc-1 else tid-1,
+        tP1 = if tid == nTasksPerLoc-1 then 0 else tid+1;
+
+  // declare local array and load values from global array
+  var uLocal1, uLocal2: [taskIndices] real;
+  uLocal1 = u[taskIndices];
+
+  // write initial conditions to ghosts
+  if globalIdx != 0        then ghosts[lM1][RIGHT, tM1].writeEF(uLocal1[taskIndicesInner.low]);
+  if globalIdx != nTasks-1 then ghosts[lP1][LEFT, tP1].writeEF(uLocal1[taskIndicesInner.high]);
+
+  for 1..nt {
+    // load values from ghost cells into local array's borders
+    if globalIdx != 0        then uLocal1[taskIndices.low] = ghosts[locId][LEFT, tid].readFE();
+    if globalIdx != nTasks-1 then uLocal1[taskIndices.high] = ghosts[locId][RIGHT, tid].readFE();
+
+    // run kernel computation
+    foreach i in taskIndicesInner do
+      uLocal2[i] = uLocal1[i] + nu * dt / dx**2 *
+                (uLocal1[i-1] - 2 * uLocal1[i] + uLocal1[i+1]);
+
+    // write results to ghost cells
+    if globalIdx != 0        then ghosts[lM1][RIGHT, tM1].writeEF(uLocal2[taskIndicesInner.low]);
+    if globalIdx != nTasks-1 then ghosts[lP1][LEFT, tP1].writeEF(uLocal2[taskIndicesInner.high]);
+
+    uLocal1 <=> uLocal2;
+  }
+
+  // store results in the global array
+  u[taskIndices] = uLocal1;
+}
+
+// start timing and comm diagnostics
+t.start();
+startVerboseComm();
+
+// run the simulation across tasks
+coforall (loc, lid) in zip(Locales, 0..) do on loc {
+  coforall tid in 0..<nTasksPerLoc do work(lid, tid);
+}
+
+// stop timing and comm diagnostics
+stopVerboseComm();
+t.stop();
+
+// ---- print final results ------
+const mean = (+ reduce u) / u.size,
+      stdDev = sqrt((+ reduce (u - mean)**2) / u.size);
+
+writeln("mean: ", mean, " stdDev: ", stdDev);
+writeln("elapsed time: ", t.elapsed(), " seconds");
diff --git a/heat_fd_solvers/heat_2d_explicit_dist.chpl b/heat_fd_solvers/heat_2d_explicit_dist.chpl
@@ -0,0 +1,132 @@
+import Time.stopwatch,
+       BlockDist.Block,
+       Collectives.barrier;
+
+use CommDiagnostics;
+config param runCommDiag = false;
+
+config const xLen = 2.0,    // length of the grid in x
+             yLen = 2.0,    // length of the grid in y
+             nx = 31,       // number of grid points in x
+             ny = 31,       // number of grid points in y
+             nt = 50,       // number of time steps
+             sigma = 0.25,  // stability parameter
+             nu = 0.05;     // viscosity
+
+const dx : real = xLen / (nx - 1),       // grid spacing in x
+      dy : real = yLen / (ny - 1),       // grid spacing in y
+      dt : real = sigma * dx * dy / nu;  // time step size
+
+// define block distributed array
+const indices = {0..<nx, 0..<ny},
+      indicesInner = indices.expand(-1),
+      INDICES = indices dmapped Block(indicesInner);
+var u: [INDICES] real;
+
+// apply initial conditions
+u = 1.0;
+u[
+  (0.5 / dx):int..<(1.0 / dx + 1):int,
+  (0.5 / dy):int..<(1.0 / dy + 1):int
+] = 2;
+
+// set up array of ghost vectors over same locale distribution as 'u'
+var ghostVecs: [u.targetLocales().domain] [0..<4] GhostVec;
+
+param L = 0, R = 1, T = 2, B = 3;
+const tidXMax = u.targetLocales().dim(0).high - 1,
+      tidYMax = u.targetLocales().dim(1).high - 1;
+
+// set up barrier and timer
+var b = new barrier(numLocales),
+    t = new stopwatch();
+t.start();
+
+if runCommDiag then startVerboseComm();
+
+// execute the FD compuation with one task per locale
+coforall (loc, (i, j)) in zip(u.targetLocales(), u.targetLocales().domain) do on loc {
+  // initialize ghost vectors
+  for param edge in 0..<4 {
+    param xy = if edge < 2 then 1 else 0;
+    ghostVecs[i, j][edge] = new GhostVec(u.localSubdomain().dim(xy).expand(1));
+  }
+
+  // synchronize across tasks
+  b.barrier();
+
+  // run the portion of the FD computation owned by this task
+  work(i, j);
+}
+
+if runCommDiag then stopVerboseComm();
+
+t.stop();
+writeln("elapsed time: ", t.elapsed(), " seconds");
+
+proc work(tidX: int, tidY: int) {
+  // declare two local sub-arrays with room to store neighboring locale's edges
+  const localIndices = u.localSubdomain(here),
+        localIndicesBuffered = localIndices.expand(1);
+
+  var uLocal1, uLocal2: [localIndicesBuffered] real;
+
+  // populate first local array with initial conditions from global array
+  uLocal1[localIndices] = u[localIndices];
+
+  // convenient constants for indexing into edges of local array
+  const LL = localIndicesBuffered.dim(0).low,
+        RR = localIndicesBuffered.dim(0).high,
+        BB = localIndicesBuffered.dim(1).low,
+        TT = localIndicesBuffered.dim(1).high;
+
+  // preliminarily populate ghost regions for neighboring locales
+  if tidX > 0       then ghostVecs[tidX-1, tidY][R].v = uLocal1[LL+1, ..];
+  if tidX < tidXMax then ghostVecs[tidX+1, tidY][L].v = uLocal1[RR-1, ..];
+  if tidY > 0       then ghostVecs[tidX, tidY-1][B].v = uLocal1[.., TT-1];
+  if tidY < tidYMax then ghostVecs[tidX, tidY+1][T].v = uLocal1[.., BB+1];
+
+  b.barrier();
+
+  // run FD computation
+  for 1..nt {
+    // populate local edges from ghost regions
+    if tidX > 0       then uLocal1[LL, ..] = ghostVecs[tidX, tidY][L].v;
+    if tidX < tidXMax then uLocal1[RR, ..] = ghostVecs[tidX, tidY][R].v;
+    if tidY > 0       then uLocal1[.., TT] = ghostVecs[tidX, tidY][T].v;
+    if tidY < tidYMax then uLocal1[.., BB] = ghostVecs[tidX, tidY][B].v;
+
+    // compute the FD kernel in parallel
+    forall (i, j) in localIndices {
+      uLocal2[i, j] = uLocal1[i, j] +
+              nu * dt / dy**2 *
+                (uLocal1[i-1, j] - 2 * uLocal1[i, j] + uLocal1[i+1, j]) +
+              nu * dt / dx**2 *
+                (uLocal1[i, j-1] - 2 * uLocal1[i, j] + uLocal1[i, j+1]);
+    }
+
+    // populate ghost regions for neighboring locales
+    if tidX > 0       then ghostVecs[tidX-1, tidY][R].v = uLocal2[LL+1, ..];
+    if tidX < tidXMax then ghostVecs[tidX+1, tidY][L].v = uLocal2[RR-1, ..];
+    if tidY > 0       then ghostVecs[tidX, tidY-1][B].v = uLocal2[.., TT-1];
+    if tidY < tidYMax then ghostVecs[tidX, tidY+1][T].v = uLocal2[.., BB+1];
+
+    // synchronize with other tasks
+    b.barrier();
+
+    // swap arrays
+    uLocal1 <=> uLocal2;
+  }
+
+  // store results back in global array
+  u[localIndices] = uLocal1[localIndices];
+}
+
+record GhostVec {
+  var d: domain(1);
+  var v: [d] real;
+
+  proc init() do this.d = {0..0};
+  proc init(r: range(int, boundKind.both, strideKind.one)) do
+    this.d = {r};
+}