-
Notifications
You must be signed in to change notification settings - Fork 2
/
fastscape_RB+PQ.cpp
548 lines (449 loc) · 21.6 KB
/
fastscape_RB+PQ.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
#include <array>
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <fenv.h> //Used to catch floating point NaN issues
#include <fstream>
#include <iomanip>
#include <iostream>
#include <limits>
#include <omp.h> //Used for OpenMP run-time functions
#include "random.hpp"
#include <vector>
#include "CumulativeTimer.hpp"
//Used to handle situations in which OpenMP is not available
//(This scenario has not been extensively tested)
#ifndef _OPENMP
#define omp_get_thread_num() 0
#define omp_get_num_threads() 1
#define omp_get_max_threads() 1
#endif
///This is a quick-and-dirty, zero-dependency function for saving the outputs of
///the model in ArcGIS ASCII DEM format (aka Arc/Info ASCII Grid, AAIGrid).
///Production code for experimentation should probably use GeoTIFF or a similar
///format as it will have a smaller file size and, thus, save quicker.
void PrintDEM(
const std::string filename,
const std::vector<double>& h,
const int width,
const int height
){
std::ofstream fout(filename.c_str());
//Since the outer ring of the dataset is a halo used for simplifying
//neighbour-finding logic, we do not save it to the output here.
fout<<"ncols "<<(width- 2)<<"\n";
fout<<"nrows "<<(height-2)<<"\n";
fout<<"xllcorner 637500.000\n"; //Arbitrarily chosen value
fout<<"yllcorner 206000.000\n"; //Arbitrarily chosen value
fout<<"cellsize 500.000\n"; //Arbitrarily chosen value
fout<<"NODATA_value -9999\n"; //Value which is guaranteed not to correspond to an actual data value
for(int y=1;y<height-1;y++){
for(int x=1;x<width-1;x++)
fout<<h[y*width+x]<<" ";
fout<<"\n";
}
}
///The entire model is contained in a handy class, which makes it easy to set up
///and solve many such models.
class FastScape_RBPQ {
private:
//Value used to indicate that a cell had no downhill neighbour and, thus, does
//not flow anywhere.
const int NO_FLOW = -1;
const double SQRT2 = 1.414213562373095048801688724209698078569671875376948; //Yup, this is overkill.
public:
//NOTE: Having these constants specified in the class rather than globally
//results in a significant speed loss. However, it is better to have them here
//under the assumption that they'd be dynamic in a real implementation.
const double keq = 2e-6; //Stream power equation constant (coefficient)
const double neq = 2; //Stream power equation constant (slope modifier)
const double meq = 0.8; //Stream power equation constant (area modifier)
const double ueq = 2e-3; //Rate of uplift
const double dt = 1000.; //Timestep interval
const double dr[8] = {1,SQRT2,1,SQRT2,1,SQRT2,1,SQRT2}; //Distance between adjacent cell centers on a rectangular grid arbitrarily scale to cell edge lengths of 1
const double tol = 1e-3; //Tolerance for Newton-Rhapson convergence while solving implicit Euler
const double cell_area = 40000; //Area of a single cell
private:
int width; //Width of DEM
int height; //Height of DEM
int size; //Size of DEM (width*height)
//The dataset is set up so that the outermost edge is never actually used:
//it's a halo which allows every cell that is actually processed to consider
//its neighbours in all 8 directions without having to check first to see if
//it is an edge cell. The ring of second-most outer cells is set to a fixed
//value to which everything erodes (in this model).
//Rec directions (also used for nshift offsets) - see below for details
//1 2 3
//0 4
//7 6 5
std::vector<double> h; //Digital elevation model (height)
std::vector<double> accum; //Flow accumulation at each point
std::vector<int> rec; //Direction of receiving cell
std::vector<int> donor; //Indices of a cell's donor cells
std::vector<int> ndon; //How many donors a cell has
std::array<int,8> nshift; //Offset from a focal cell's index to its neighbours in terms of flat indexing
int stack_width; //Number of cells allowed in the stack
int level_width; //Number of cells allowed in a level
//Timers for keeping track of how long each part of the code takes
CumulativeTimer Tmr_Step1_Initialize;
CumulativeTimer Tmr_Step2_DetermineReceivers;
CumulativeTimer Tmr_Step3_DetermineDonors;
CumulativeTimer Tmr_Step4_GenerateOrder;
CumulativeTimer Tmr_Step5_FlowAcc;
CumulativeTimer Tmr_Step6_Uplift;
CumulativeTimer Tmr_Step7_Erosion;
CumulativeTimer Tmr_Overall;
private:
void GenerateRandomTerrain(){
//srand(std::random_device()());
for(int y=0;y<height;y++)
for(int x=0;x<width;x++){
const int c = y*width+x;
h[c] = uniform_rand_real(0,1);
//Outer edge is set to 0 and never touched again. It is used only as a
//convenience so we don't have to worry when a focal cell looks at its
//neighbours.
if(x == 0 || y==0 || x==width-1 || y==height-1)
h[c] = 0;
//Second outer-most edge is set to 0 and never touched again. This is the
//baseline to which all cells would erode where it not for uplift. You can
//think of this as being "sea level".
if(x == 1 || y==1 || x==width-2 || y==height-2)
h[c] = 0;
}
}
public:
///Initializing code
FastScape_RBPQ(const int width0, const int height0)
//Initialize code for finding neighbours of a cell
: nshift{-1,-width0-1,-width0,-width0+1,1,width0+1,width0,width0-1}
{
Tmr_Overall.start();
Tmr_Step1_Initialize.start();
width = width0;
height = height0;
size = width*height;
h.resize(size); //Memory for terrain height
GenerateRandomTerrain(); //Could replace this with custom initializer
Tmr_Step1_Initialize.stop();
Tmr_Overall.stop();
}
private:
///The receiver of a focal cell is the cell which receives the focal cells'
///flow. Here, we model the receiving cell as being the one connected to the
///focal cell by the steppest gradient. If there is no local gradient, than
///the special value NO_FLOW is assigned.
void ComputeReceivers(){
//Edge cells do not have receivers because they do not distribute their flow
//to anywhere.
#pragma omp for collapse(2)
for(int y=2;y<height-2;y++)
for(int x=2;x<width-2;x++){
const int c = y*width+x;
//The slope must be greater than zero for there to be downhill flow;
//otherwise, the cell is marked NO_FLOW.
double max_slope = 0; //Maximum slope seen so far amongst neighbours
int max_n = NO_FLOW; //Direction of neighbour which had maximum slope to focal cell
//Loop over neighbours
for(int n=0;n<8;n++){
const double slope = (h[c] - h[c+nshift[n]])/dr[n]; //Slope to neighbour n
if(slope>max_slope){ //Is this the steepest slope we've seen?
max_slope = slope; //If so, make a note of the slope
max_n = n; //And which cell it came from
}
}
rec[c] = max_n; //Having considered all neighbours, this is the steepest
}
}
///The donors of a focal cell are the neighbours from which it receives flow.
///Here, we identify those neighbours by inverting the Receivers array.
void ComputeDonors(){
//The B&W method of developing the donor array has each focal cell F inform
//its receiving cell R that F is a donor of R. Unfortunately, parallelizing
//this is difficult because more than one cell might be informing R at any
//given time. Atomics are a solution, but they impose a performance cost
//(though using the latest and greatest hardware decreases this penalty).
//Instead, we invert the operation. Each focal cell now examines its
//neighbours to see if it receives from them. Each focal cell is then
//guaranteed to have sole write-access to its location in the donor array.
//Remember, the outermost ring of cells is a convenience halo, so we don't
//calculate donors for it.
#pragma omp for collapse(2) schedule(static) nowait
for(int y=1;y<height-1;y++)
for(int x=1;x<width-1;x++){
const int c = y*width+x;
ndon[c] = 0; //Cell has no donor neighbours we know about
for(int ni=0;ni<8;ni++){
const int n = c+nshift[ni];
//If the neighbour has a receiving cell and that receiving cell is
//the current focal cell c
if(rec[n]!=NO_FLOW && n+nshift[rec[n]]==c){
donor[8*c+ndon[c]] = n;
ndon[c]++;
}
}
}
}
///Cells must be ordered so that they can be traversed such that higher cells
///are processed before their lower neighbouring cells. This method creates
///such an order. It also produces a list of "levels": cells which are,
///topologically, neither higher nor lower than each other. Cells in the same
///level can all be processed simultaneously without having to worry about
///race conditions.
void GenerateOrder(
std::vector<int>& stack,
std::vector<int>& levels,
int &nlevel
){
int nstack = 0;
levels[0] = 0;
nlevel = 1;
//Outer edge
#pragma omp for schedule(static) nowait
for(int y=1;y<height-1;y++){
stack[nstack++] = y*width+1; assert(nstack<stack_width);
stack[nstack++] = y*width+(width-2); assert(nstack<stack_width);
}
#pragma omp for schedule(static) nowait
for(int x=2;x<width-2;x++){
stack[nstack++] = 1*width+x; assert(nstack<stack_width);
stack[nstack++] = (height-2)*width+x; assert(nstack<stack_width);
}
//End of outer edge
levels[nlevel++] = nstack; //Last cell of this level
//Interior cells
//TODO: Outside edge is always NO_FLOW. Maybe this can get loaded once?
//Load cells without dependencies into the queue
//TODO: Why can't I use nowait here?
#pragma omp for collapse(2) schedule(static)
for(int y=2;y<height-2;y++)
for(int x=2;x<width -2;x++){
const int c = y*width+x;
if(rec[c]==NO_FLOW){
stack[nstack++] = c;
assert(nstack<stack_width);
}
}
levels[nlevel++] = nstack; //Last cell of this level
assert(nlevel<level_width);
//Start with level_bottom=-1 so we get into the loop, it is immediately
//replaced by level_top.
int level_bottom = -1; //First cell of the current level
int level_top = 0; //Last cell of the current level
#pragma omp barrier //Must ensure ComputeDonors is done before we start the following
while(level_bottom<level_top){ //Ensure we parse all the cells
level_bottom = level_top; //The top of the previous level we considered is the bottom of the current level
level_top = nstack; //The new top is the end of the stack (last cell added from the previous level)
for(int si=level_bottom;si<level_top;si++){
const auto c = stack[si];
//Load donating neighbours of focal cell into the stack
for(int k=0;k<ndon[c];k++){
const auto n = donor[8*c+k];
stack[nstack++] = n;
assert(nstack<stack_width);
}
}
levels[nlevel++] = nstack; //Start a new level
}
//End condition for the loop places two identical entries
//at the end of the stack. Remove one.
nlevel--;
assert(levels[nlevel-1]==nstack);
}
///Compute the flow accumulation for each cell: the number of cells whose flow
///ultimately passes through the focal cell multiplied by the area of each
///cell. Each cell could also have its own weighting based on, say, average
///rainfall.
void ComputeFlowAcc(
const std::vector<int>& stack,
const std::vector<int>& levels,
const int &nlevel
){
for(int i=levels[0];i<levels[nlevel-1];i++){
const int c = stack[i];
accum[c] = cell_area;
}
//Highly-elevated cells pass their flow to less elevated neighbour cells.
//The queue is ordered so that higher cells are keyed to higher indices in
//the queue; therefore, parsing the queue in reverse ensures that fluid
//flows downhill.
//We can process the cells in each level in parallel. To prevent race
//conditions, each focal cell figures out what contirbutions it receives
//from its neighbours.
//`nlevel-1` is the upper bound of the stack.
//`nlevel-2` through `nlevel-1` are the cells which have no higher neighbours (top of the watershed)
//`nlevel-3` through `nlevel-2` are the first set of cells with higher neighbours, so this is where we start
for(int li=nlevel-3;li>=1;li--){
const int lvlstart = levels[li]; //Starting index of level in stack
const int lvlend = levels[li+1]; //Ending index of level in stack
for(int si=lvlstart;si<lvlend;si++){
const int c = stack[si];
for(int k=0;k<ndon[c];k++){
const auto n = donor[8*c+k];
accum[c] += accum[n];
}
}
}
}
///Raise each cell in the landscape by some amount, otherwise it wil get worn
///flat (in this model, with these settings)
void AddUplift(
const std::vector<int>& stack,
const std::vector<int>& levels,
const int &nlevel
){
//We exclude two exterior rings of cells in this example. The outermost ring
//(the edges of the dataset) allows us to ignore the edges of the dataset,
//the second-most outer ring (the cells bordering the edge cells of the
//dataset) are fixed to a specified height in this model. All other cells
//have heights which actively change and they are altered here.
//Start at levels[1] so we don't elevate the outer edge
#pragma omp simd
for(int i=levels[1];i<levels[nlevel-1];i++){
const int c = stack[i];
h[c] += ueq*dt;
}
}
///Decrease he height of cells according to the stream power equation; that
///is, based on a constant K, flow accumulation A, the local slope between
///the cell and its receiving neighbour, and some judiciously-chosen constants
///m and n.
/// h_next = h_current - K*dt*(A^m)*(Slope)^n
///We solve this equation implicitly to preserve accuracy
void Erode(
const std::vector<int>& stack,
const std::vector<int>& levels,
const int nlevel
){
//The cells in each level can be processed in parallel, so we loop over
//levels starting from the lower-most (the one closest to the NO_FLOW cells)
//#pragma omp parallel default(none)
for(int li=2;li<nlevel-1;li++){
const int lvlstart = levels[li];
const int lvlend = levels[li+1];
#pragma omp simd
for(int si=lvlstart;si<lvlend;si++){
const int c = stack[si]; //Cell from which flow originates
const int n = c+nshift[rec[c]]; //Cell receiving the flow
const double length = dr[rec[c]];
//`fact` contains a set of values which are constant throughout the integration
const double fact = keq*dt*std::pow(accum[c],meq)/std::pow(length,neq);
const double h0 = h[c]; //Elevation of focal cell
const double hn = h[n]; //Elevation of neighbouring (receiving, lower) cell
double hnew = h0; //Current updated value of focal cell
double hp = h0; //Previous updated value of focal cell
double diff = 2*tol; //Difference between current and previous updated values
while(std::abs(diff)>tol){ //Newton-Rhapson method (run until subsequent values differ by less than a tolerance, which can be set to any desired precision)
hnew -= (hnew-h0+fact*std::pow(hnew-hn,neq))/(1.+fact*neq*std::pow(hnew-hn,neq-1));
diff = hnew - hp; //Difference between previous and current value of the iteration
hp = hnew; //Update previous value to new value
}
h[c] = hnew; //Update value in array
}
}
}
public:
///Run the model forward for a specified number of timesteps. No new
///initialization is done. This allows the model to be stopped, the terrain
///altered, and the model continued. For space-efficiency, a number of
///temporary arrays are created each time this is run, so repeatedly running
///this function for the same model will likely not be performant due to
///reallocations. If that is your use case, you'll want to modify your code
///appropriately.
void run(const int nstep){
Tmr_Overall.start();
Tmr_Step1_Initialize.start();
//TODO: Make smaller, explain max
stack_width = std::max(300000,5*size/omp_get_max_threads()); //Number of stack entries available to each thread
level_width = std::max(1000,size/omp_get_max_threads()); //Number of level entries available to each thread
accum.resize( size); //Stores flow accumulation
rec.resize ( size); //Array of Receiver directions
ndon.resize ( size); //Number of donors each cell has
donor.resize(8*size); //Array listing the donors of each cell (up to 8 for a rectangular grid)
//! initializing rec
#pragma omp parallel for
for(int i=0;i<size;i++)
rec[i] = NO_FLOW;
#pragma omp parallel for
for(int i=0;i<size;i++)
ndon[i] = 0;
#pragma omp parallel
{
std::vector<int> stack(stack_width); //Indices of cells in the order they should be processed
//A level is a set of cells which can all be processed simultaneously.
//Topologically, cells within a level are neither descendents or ancestors
//of each other in a topological sorting, but are the same number of steps
//from the edge of the dataset.
//It's difficult to know how much memory should be allocated for levels. For
//a square DEM with isotropic dispersion this is approximately sqrt(E/2). A
//diagonally tilted surface with isotropic dispersion may have sqrt(E)
//levels. A tortorously sinuous river may have up to E*E levels. We
//compromise and choose a number of levels equal to the perimiter because
//why not?
std::vector<int> levels(level_width);
int nlevel = 0;
Tmr_Step1_Initialize.stop();
for(int step=0;step<=nstep;step++){
Tmr_Step2_DetermineReceivers.start (); ComputeReceivers (); Tmr_Step2_DetermineReceivers.stop ();
Tmr_Step3_DetermineDonors.start (); ComputeDonors (); Tmr_Step3_DetermineDonors.stop ();
Tmr_Step4_GenerateOrder.start (); GenerateOrder (stack,levels,nlevel); Tmr_Step4_GenerateOrder.stop ();
Tmr_Step5_FlowAcc.start (); ComputeFlowAcc (stack,levels,nlevel); Tmr_Step5_FlowAcc.stop ();
Tmr_Step6_Uplift.start (); AddUplift (stack,levels,nlevel); Tmr_Step6_Uplift.stop ();
Tmr_Step7_Erosion.start (); Erode (stack,levels,nlevel); Tmr_Step7_Erosion.stop ();
#pragma omp barrier //Ensure threads synchronize after erosion so we calculate receivers correctly
#pragma omp master
if( step%20==0 ) //Show progress
std::cout<<"p Step = "<<step<<std::endl;
}
}
Tmr_Overall.stop();
std::cout<<"t Step1: Initialize = "<<std::setw(15)<<Tmr_Step1_Initialize.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step2: DetermineReceivers = "<<std::setw(15)<<Tmr_Step2_DetermineReceivers.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step3: DetermineDonors = "<<std::setw(15)<<Tmr_Step3_DetermineDonors.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step4: GenerateOrder = "<<std::setw(15)<<Tmr_Step4_GenerateOrder.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step5: FlowAcc = "<<std::setw(15)<<Tmr_Step5_FlowAcc.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step6: Uplift = "<<std::setw(15)<<Tmr_Step6_Uplift.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step7: Erosion = "<<std::setw(15)<<Tmr_Step7_Erosion.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Overall = "<<std::setw(15)<<Tmr_Overall.elapsed() <<" microseconds"<<std::endl;
//Free up memory, except for the resulting landscape height field prior to
//exiting so that unnecessary space is not used when the model is not being
//run.
accum .clear(); accum .shrink_to_fit();
rec .clear(); rec .shrink_to_fit();
ndon .clear(); ndon .shrink_to_fit();
donor .clear(); donor .shrink_to_fit();
}
///Returns a pointer to the data so that it can be copied, printed, &c.
std::vector<double>& getH() {
return h;
}
};
int main(int argc, char **argv){
//Enable this to stop the program if a floating-point exception happens
//feenableexcept(FE_ALL_EXCEPT);
if(argc!=5){
std::cerr<<"Syntax: "<<argv[0]<<" <Dimension> <Steps> <Output Name> <Seed>"<<std::endl;
return -1;
}
const int width = std::stoi (argv[1]);
const int height = std::stoi (argv[1]);
const int nstep = std::stoi (argv[2]);
const std::string output_name = argv[3] ;
const auto rand_seed = std::stoul(argv[4]);
seed_rand(rand_seed);
//Uses the RichDEM machine-readable line prefixes
//Name of algorithm
std::cout<<"A FastScape RB+PQ"<<std::endl;
//Citation for algorithm
std::cout<<"C Richard Barnes TODO"<<std::endl;
//Git hash of code used to produce outputs of algorithm
std::cout<<"h git_hash = "<<GIT_HASH<<std::endl;
//Random seed used to produce outputs
std::cout<<"m Random seed = "<<rand_seed<<std::endl;
CumulativeTimer tmr(true);
FastScape_RBPQ tm(width,height);
tm.run(nstep);
std::cout<<"t Total calculation time = "<<std::setw(15)<<tmr.elapsed()<<" microseconds"<<std::endl;
PrintDEM(output_name, tm.getH(), width, height);
return 0;
}