-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathhenry_gpu.cu
273 lines (230 loc) · 10.2 KB
/
henry_gpu.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <string>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <map>
#include <curand_kernel.h>
/* include MTGP host helper functions */
#include <curand_mtgp32_host.h>
/* include MTGP pre-computed parameter sets */
#include <curand_mtgp32dc_p_11213.h>
#define CUDA_CALL(x) do { if((x) != cudaSuccess) { \
printf("Error %s at %s:%d\n",cudaGetErrorString(x), __FILE__, __LINE__); \
return EXIT_FAILURE;}} while(0)
#define CURAND_CALL(x) do { if((x) != CURAND_STATUS_SUCCESS) { \
printf("Error at %s:%d\n",__FILE__,__LINE__); \
return EXIT_FAILURE;}} while(0)
#define NUMTHREADS 256 // number of threads per GPU block
// data for atom of crystal structure
// Unit cell of crystal structure can then be stored
// as pointer array of StructureAtom's
struct StructureAtom {
// Cartesian position, units: A
double x;
double y;
double z;
// Lennard-Jones epsilon parameter with adsorbate
double epsilon; // units: K
// Lennard-Jones sigma parameter with adsorbate
double sigma; // units: A
};
// temperature, Kelvin
const double T = 298.0;
// Universal gas constant, m3 - Pa / (K - mol)
const double R = 8.314;
// Compute the Boltzmann factor of methane at point (x, y, z) inside structure
// Loop over all atoms of unit cell of crystal structure
// Find nearest image to methane at point (x, y, z) for application of periodic boundary conditions
// Compute energy contribution due to this atom via the Lennard-Jones potential
__device__ double ComputeBoltzmannFactorAtPoint(double x, double y, double z,
const StructureAtom * __restrict__ structureatoms,
double natoms,
double L) {
// (x, y, z) : Cartesian coords of methane molecule
// structureatoms : pointer array storing info on unit cell of crystal structure
// natoms : number of atoms in crystal structure
// L : box length
// returns Boltzmann factor e^{-E/(RT)}
double E = 0.0; // energy (K)
// loop over atoms in crystal structure
for (int i = 0; i < natoms; i++) {
// Compute distance in each coordinate from (x, y, z) to this structure atom
double dx = x - structureatoms[i].x;
double dy = y - structureatoms[i].y;
double dz = z - structureatoms[i].z;
// apply nearest image convention for periodic boundary conditions
const double boxupper = 0.5 * L;
const double boxlower = -0.5 * L;
dx = (dx > boxupper) ? dx-L : dx;
dx = (dx > boxupper) ? dx-L : dx;
dy = (dy > boxupper) ? dy-L : dy;
dy = (dy <= boxlower) ? dy-L : dy;
dz = (dz <= boxlower) ? dz-L : dz;
dz = (dz <= boxlower) ? dz-L : dz;
// compute inverse distance
double rinv = rsqrt(dx*dx + dy*dy + dz*dz);
// Compute contribution to energy of adsorbate at (x, y, z) due to this atom
// Lennard-Jones potential (this is the efficient way to compute it)
double sig_ovr_r = rinv * structureatoms[i].sigma;
double sig_ovr_r6 = pow(sig_ovr_r, 6);
double sig_ovr_r12 = sig_ovr_r6 * sig_ovr_r6;
E += 4.0 * structureatoms[i].epsilon * (sig_ovr_r12 - sig_ovr_r6);
}
return exp(-E / (R * T)); // return Boltzmann factor
}
// Inserts a methane molecule at a random position inside the structure
// Calls function to compute Boltzmann factor at this point
// Stores Boltzmann factor computed at this thread in boltzmannFactors
__global__ void PerformInsertions(curandStateMtgp32 *state,
double *boltzmannFactors,
const StructureAtom * __restrict__ structureatoms,
int natoms, double L) {
// state : random number generator
// boltzmannFactors : pointer array in which to store computed Boltzmann factors
// structureatoms : pointer array storing atoms in unit cell of crystal structure
// natoms : number of atoms in crystal structure
// L : box length
int id = threadIdx.x + blockIdx.x * NUMTHREADS;
// Generate random position inside the cubic unit cell of the structure
double x = L * curand_uniform_double(&state[blockIdx.x]);
double y = L * curand_uniform_double(&state[blockIdx.x]);
double z = L * curand_uniform_double(&state[blockIdx.x]);
// Compute Boltzmann factor, store in boltzmannFactors
boltzmannFactors[id] = ComputeBoltzmannFactorAtPoint(x, y, z, structureatoms, natoms, L);
}
int main(int argc, char *argv[]) {
// take in number of MC insertions as argument
if (argc != 2) {
printf("Run as:\n./henry ninsertions\n");
exit(EXIT_FAILURE);
}
int ncycles = atoi(argv[1]); // Number of Monte Carlo insertions
//
// Energetic model for interactions of methane molecule with atoms of framework
// pairwise Lennard-Jones potentials
//
// Epsilon parameters for Lennard-Jones potential (K)
std::map<std::string, double> epsilons;
epsilons["Zn"] = 96.152688;
epsilons["O"] = 66.884614;
epsilons["C"] = 88.480032;
epsilons["H"] = 57.276566;
// Sigma parameters for Lennard-Jones potential (A)
std::map<std::string, double> sigmas;
sigmas["Zn"] = 3.095775;
sigmas["O"] = 3.424075;
sigmas["C"] = 3.580425;
sigmas["H"] = 3.150565;
//
// Import unit cell of nanoporous material IRMOF-1
//
StructureAtom *structureatoms; // store atoms in pointer array here
// open crystal structure file
std::ifstream materialfile("IRMOF-1.cssr");
if (materialfile.fail()) {
printf("IRMOF-1.cssr failed to import.\n");
exit(EXIT_FAILURE);
}
// read cubic box dimensions
std::string line;
getline(materialfile, line);
std::istringstream istream(line);
double L; // dimension of cube
istream >> L;
printf("L = %f\n", L);
// waste line
getline(materialfile, line);
// get number of atoms
getline(materialfile, line);
int natoms; // number of atoms in unit cell of IRMOF-1
istream.str(line);
istream.clear();
istream >> natoms;
printf("%d atoms\n", natoms);
// waste line
getline(materialfile, line);
// Allocate space for material atoms and epsilons/sigmas on both host and device
// using Unified Memory
CUDA_CALL(cudaMallocManaged(&structureatoms, natoms * sizeof(StructureAtom)));
// read atom coordinates
for (int i = 0; i < natoms; i++) {
// read atoms from .cssr file
getline(materialfile, line);
istream.str(line);
istream.clear();
int atomno;
double xf, yf, zf; // fractional coordinates
std::string element;
istream >> atomno >> element >> xf >> yf >> zf;
// load structureatoms with Cartesian coordinates of this atom
structureatoms[i].x = L * xf;
structureatoms[i].y = L * yf;
structureatoms[i].z = L * zf;
// store epsilon and sigma Lennard-Jones parameters as well, for ease of computation on device
structureatoms[i].epsilon = epsilons[element];
structureatoms[i].sigma = sigmas[element];
// printf("%d. %s, (%f, %f, %f), eps = %f, sig = %f\n",
// atomno, element.c_str(),
// structureatoms[i].x, structureatoms[i].y, structureatoms[i].z,
// structureatoms[i].epsilon,
// structureatoms[i].sigma);
}
// Determine number of blocks for high occupancy: 4 per multiprocessor
int deviceId = -1;
int nMultiprocessors = 0;
CUDA_CALL(cudaGetDevice(&deviceId));
CUDA_CALL(cudaDeviceGetAttribute(&nMultiprocessors, cudaDevAttrMultiProcessorCount, deviceId));
int nBlocks = nMultiprocessors * 4;
// calculate number of MC insertions
int ninsertions = ncycles * nBlocks * NUMTHREADS;
//
// Allocate space for storing Boltzmann factors computed on each thread using unified memory
//
double * boltzmannFactors;
CUDA_CALL(cudaMallocManaged(&boltzmannFactors, nBlocks * NUMTHREADS * sizeof(double)));
//
// Set up random number generator on device
//
curandStateMtgp32 *devMTGPStates;
mtgp32_kernel_params *devKernelParams;
// Allocate space for prng states on device. One per block
CUDA_CALL(cudaMalloc((void **) &devMTGPStates, nBlocks * sizeof(curandStateMtgp32)));
// Setup MTGP prng states
// Allocate space for MTGP kernel parameters
CUDA_CALL(cudaMalloc((void**) &devKernelParams, sizeof(mtgp32_kernel_params)));
// Reformat from predefined parameter sets to kernel format,
// and copy kernel parameters to device memory
CURAND_CALL(curandMakeMTGP32Constants(mtgp32dc_params_fast_11213, devKernelParams));
// Initialize one state per thread block
CURAND_CALL(curandMakeMTGP32KernelState(devMTGPStates, mtgp32dc_params_fast_11213, devKernelParams, nBlocks, 1234));
// State setup is complete
//
// Compute the Henry coefficient
// KH = < e^{-E/(kB * T)} > / (R * T)
// Brackets denote average over space
//
double KH = 0.0; // will be Henry coefficient
for (int cycle = 0; cycle < ncycles; cycle++) {
// Perform Monte Carlo insertions in parallel on the GPU
PerformInsertions<<<nBlocks, NUMTHREADS>>>(devMTGPStates, boltzmannFactors, structureatoms, natoms, L);
cudaDeviceSynchronize();
// Compute Henry coefficient from the sampled Boltzmann factors (will be average Boltzmann factor divided by RT)
for(int i = 0; i < nBlocks * NUMTHREADS; i++)
KH += boltzmannFactors[i];
}
KH = KH / ninsertions; // --> average Boltzmann factor
// at this point KH = < e^{-E/(kB/T)} >
KH = KH / (R * T); // divide by RT
printf("Used %d blocks with %d thread each\n", nBlocks, NUMTHREADS);
printf("Henry constant = %e mol/(m3 - Pa)\n", KH);
printf("Number of actual insertions: %d\n", ninsertions);
printf("Number of times we called the GPU kernel: %d\n", ncycles);
// Clean-up
CUDA_CALL(cudaFree(devMTGPStates));
CUDA_CALL(cudaFree(structureatoms));
CUDA_CALL(cudaFree(boltzmannFactors));
return EXIT_SUCCESS;
}