-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdfgetx3_utils.py
486 lines (369 loc) · 14.8 KB
/
pdfgetx3_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
# Some utility functions for running pdfgetx3
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from sklearn.metrics import mean_squared_error
import subprocess
from textwrap import dedent
def run(commands, cwd='./'):
'''Run commands with subprocess'''
if not isinstance(commands, list):
commands = [commands]
for cmd in commands:
subprocess.run(cmd, cwd=cwd, shell=True)
def get_data(file, comments='#', skiprows=0):
'''
Get data from a dat file
file : str, file
'''
data = np.loadtxt(file, comments=comments, skiprows=skiprows)
x = data[:,0]
y = data[:,1]
return x, y
def g2G(r, g_r, rho0):
'''
Convert g(r) to G(r), the weighted radial distribution function to the reduced pair distribution function
G(r) = 4 pi r / rho0 ( g(r) - 1 )
r : array-like, Angstroms
g_r : array-like, same shape as r
rho0 : float, number density = N/V
'''
return 4*np.pi*r/rho0 * (g_r-1)
def G2g(r, G_r, rho0):
'''
Convert G(r) to g(r), inverse of g2G()
g(r) = G(r) rho0 / (4 pi r) + 1
r : array-like, Angstroms
G_r : array-like, same shape as r
rho0 : float, number density = N/V
'''
return G_r*rho0/4/np.pi/r + 1
def write_G_r(r, G_r, filename='output.gr'):
'''
Write data to file
r : array-like, Angstroms
G_r : array-like, same shape as r
filename : str, file
'''
data = np.vstack((r, G_r)).T
np.savetxt(filename, data, fmt='%.8f', delimiter='\t', header='r (A)\tG(r)')
def plot_G_r(r, G_r, label=None, xticks=(1,0.25), yticks=(0.1,0.025), xlims=None, ylims=None, alpha=1, savename=None, ax=None):
'''Plot G(r) nicely with tick marks'''
if ax is None:
fig, ax = plt.subplots(1,1, figsize=(8,6))
plt.plot(r, G_r, label=label, alpha=alpha)
plt.xlabel('r ($\mathrm{\AA}$)', fontsize=14)
plt.ylabel('G(r)', fontsize=14)
plt.legend(fontsize=14)
if xlims is not None:
plt.xlim(xlims)
if ylims is not None:
plt.ylim(ylims)
ax.xaxis.set_major_locator(MultipleLocator(xticks[0]))
ax.xaxis.set_minor_locator(MultipleLocator(xticks[1]))
ax.yaxis.set_major_locator(MultipleLocator(yticks[0]))
ax.yaxis.set_minor_locator(MultipleLocator(yticks[1]))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
if savename is not None:
plt.savefig(savename)
return ax
def plot_g_r(r, g_r, label=None, alpha=1, xticks=(1,0.25), yticks=(0.5,0.1), ylims=None, xlims=None, color=None, ax=None, output=None):
'''
Plot PDF nicely from r and g(r)
r : units Angstroms
'''
if ax is None:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(r, g_r, label=label, alpha=alpha, c=color)
ax.xaxis.set_major_locator(MultipleLocator(xticks[0]))
ax.xaxis.set_minor_locator(MultipleLocator(xticks[1]))
ax.yaxis.set_major_locator(MultipleLocator(yticks[0]))
ax.yaxis.set_minor_locator(MultipleLocator(yticks[1]))
if ylims is None:
plt.ylim(0, g_r.max() + 1)
else:
plt.ylim(ylims[0], ylims[1])
if xlims is None:
plt.xlim(0, r.max())
else:
plt.xlim(xlims[0], xlims[1])
plt.ylabel('g(r)')
plt.xlabel('r ($\mathrm{\AA}$)')
if label is not None:
plt.legend()
if output is not None:
plt.savefig(output)
return ax
def objective_function_wet(x, input_wet='avgd_PA_wet_11p5min_getx3.qq.txt', input_dry='avgd_PA_dry_11p5min_getx3.qq.txt', bg_wet='avgd_water_11p5min_getx3.qq.txt', bg_dry=None, comp_wet='C4451.85H5687.94O2132.77N731.62', comp_dry='C17.7N2.9H11.8O3.1', output_wet='tmp_wet.qq.gr', output_dry='tmp_dry.qq.gr'):
'''
Objective function for the optimization problem
Input (x) : bgscale, rpoly, qmin, qmax
Output : MSE between G_r_wet and G_r_dry for r values >= rpoly
'''
# bgscale, rpoly, qmin, qmax = x
bgscale = float(x)
rpoly = 2
qmin = 0.3
qmax = 16
# standard configuration settings
cfg = Config()
cfg.outputtype = ['gr']
# cfg.outputtype = ['iq']
cfg.plot = ['none']
cfg.verbose = 0
# variable configuration settings
cfg.bgscale = bgscale
cfg.rpoly = rpoly
cfg.qmin = qmin
cfg.qmax = qmax
# system specific configuration settings
cfg.backgroundfile = bg_wet
cfg.composition = comp_wet
cfg.output = output_wet
cfg.run(inputs=[input_wet], config_name='tmp_wet.cfg')
r_wet, G_r_wet = get_data(output_wet, skiprows=27)
# q_wet, I_q_wet = get_data(output_wet, skiprows=27)
cfg.backgroundfile = bg_dry
cfg.composition = comp_dry
cfg.output = output_dry
cfg.run(inputs=[input_dry], config_name='tmp_dry.cfg')
r_dry, G_r_dry = get_data(output_dry, skiprows=27)
# q_dry, I_q_dry = get_data(output_dry, skiprows=27)
G_r_wet = G_r_wet[r_wet >= rpoly]
G_r_dry = G_r_dry[r_dry >= rpoly]
# I_q_wet = I_q_wet[1:]
run('rm tmp*')
return mean_squared_error(G_r_dry, G_r_wet)
# return mean_squared_error(I_q_dry, I_q_wet)
class Config:
def __init__(self):
# initialize all the options
self.filename = 'pdfgetx3.cfg'
# input and output options
self.parameters = 'DEFAULT'
self.dataformat = 'QA'
self.inputfile = []
self.backgroundfile = None
self.formfactorfile = None
self.datapath = []
self.output = '@b.@o'
self.outputtype = ['none']
self.force = False
# PDF parameters
self.bgscale = 1
self.mode = 'xray'
self.wavelength = 0.2116
self.twothetazero = 0
self.composition = 'C4451.85H5687.94O2132.77N731.62' # default is for wet PA from simulation
self.rpoly = 2
self.qmaxinst = 23.0
self.qmin = 0.3
self.qmax = 16
self.rmin = 0.0
self.rmax = 10
self.rstep = 0.02
# other parameters
self.plot = ['none']
self.interact = 'no'
self.verbose = 5
def write(self, filename=None):
'''Write out the pdfgetx3 config file'''
if filename is None:
filename = self.filename
self.filename = filename
out = open(filename, 'w')
header = dedent('''\
## Template configuration file for pdfgetx3.
## pdfgetx3 searches for default configurations in ~/.pdfgetx3.cfg,
## .pdfgetx3.cfg, pdfgetx3.cfg and then loads a custom configuration
## if specified via the "-c" option. You can run
##
## pdfgetx3 --verbose=info
##
## to verify how and from what file are the parameters set.
## The default section -------------------------------------------------------
''')
out.write(header)
defaults = dedent(f'''\
## Parameters defined here are also available in custom sections below.
[{self.parameters}]
''')
out.write(defaults)
dataformat = dedent(f'''\
## Format of input files. Available formats are: "twotheta", "QA", "Qnm"
## corresponding to a 2-column text data where the first column is either
## twotheta in degrees, Q in inverse Angstroms or Q in inverse nanometers.
dataformat = {self.dataformat}
''')
out.write(dataformat)
input_intensities = dedent(f'''\
## One or more input xray intensities from the sample. This setting is
## ignored if there are any files provided on the command line. Several
## files need to be specified as one file per line.
''')
for inp in self.inputfile:
input_intensities += f'{inp}\n'
out.write(input_intensities)
backgroundfile = dedent(f'''\
## Optional background intensities from container and air scattering
''')
if self.backgroundfile is not None:
backgroundfile += f'backgroundfile = {self.backgroundfile}\n'
backgroundfile += '\n'
out.write(backgroundfile)
add_dir = dedent(f'''\
## Additional directories to be searched for input files, one per line.
''')
for dp in self.datapath:
add_dir += f'{dp}\n'
out.write(add_dir)
bgscale = dedent(f'''\
## Optional scaling of the background intensities. By default 1.
bgscale = {self.bgscale}
''')
out.write(bgscale)
formfactors = dedent(f'''\
## Form factor intensities of the scatterers. This is required for `sas`
## mode. The form factor file is expected to be in two-column format
## with (Q, f2avg) data or three-column format with
## (Q, f2avg, favg2) data. The unit of Q is required to be A^-1.
''')
if self.formfactorfile is not None:
formfactors += f'formfactorfile = {self.formfactorfile}\n'
formfactors += '\n'
out.write(formfactors)
outputs = dedent(f'''\
## Output file name, write to the standard output when "-".
## This may contain @f, @h, @r, @e, @t, @b, @o tokens which expands as follows:
##
## @f dir1/dir2/filename.dat input file path
## @h dir1/dir2 input file head directory or '.'
## @r dir1/dir2/filename input path with extension removed
## @e dat input file extension without '.'
## @t filename.dat tail component of the input file
## @b filename tail component with extension removed
## @o gr output extension iq, sq, fq or gr
##
## An empty value works the same as "@b.@o", i.e., saves the data
## in the current directory with a proper data type extension.
output = {self.output}
''')
out.write(outputs)
outputtypes = dedent(f'''\
## Types of output files to be saved. Possible values are
## "iq", "sq", "fq", "gr", also used as filename extensions.
## No files are saved when empty, "none" or "NONE".
''')
line = 'outputtype ='
for ot in self.outputtype:
line += f' {ot}'
outputtypes += line + '\n'
out.write(outputtypes)
forces = dedent(f'''\
## Flag for overwriting existing output files. By default False.
## It is probably safer to use it from command line.
force = {self.force}
''')
out.write(forces)
mode = dedent(f'''\
## The PDF calculator configuration mode or a name of the calculator
## setup. The available modes correspond to the radiation type.
## The supported modes are:
## 'xray', 'neutron', 'sas'
mode = {self.mode}
''')
out.write(mode)
wavelength = dedent(f'''\
## X-ray, neutron, or electron wavelength in Angstroms.
## Required for the "twotheta" dataformat.
wavelength = {self.wavelength}
''')
out.write(wavelength)
twothetazero = dedent(f'''\
## Position of the zero scattering angle in diffractometer degrees.
## Applies only for the "twotheta" dataformat.
twothetazero = {self.twothetazero}
''')
out.write(twothetazero)
composition = dedent(f'''\
## Chemical composition of the measured sample. Supported formats are
## "PbTi0.5Zr0.5O3", "Pb (TiZr)0.5 O3" or "Pb 1 (Ti Zr) 1/2 O 3".
## Space characters are ignored, unit counts can be omitted, but it is
## important to use a proper upper and lower case in atom symbols.
## Elements can appear several times in the formula, e.g., "CH3 CH3",
## and the formula may contain parentheses or fractional counts.
composition = {self.composition}
''')
out.write(composition)
rpoly = dedent(f'''\
## r-limit for the maximum frequency in the F(Q) correction polynomial.
## The PDF is unreliable at shorter r, however a too small rpoly
## disables polynomial correction and yields noisy PDF. Too large
## values may smooth-out useful signal in the data.
rpoly = {self.rpoly}
''')
out.write(rpoly)
qmaxinst = dedent(f'''\
## The Q cutoff for the meaningful input intensities in inverse Angstroms.
## This is the upper boundary for the qmax parameter. It is also used as
## the upper boundary for the polynomial fits in S(Q) corrections.
qmaxinst = {self.qmaxinst}
''')
out.write(qmaxinst)
qmin = dedent(f'''
## Lower Q cutoff for Fourier transformation in inverse Angstroms.
## Use 0.0 when not specified.
qmin = {self.qmin}
''')
out.write(qmin)
qmax = dedent(f'''\
## Upper Q cutoff for Fourier transformation in inverse Angstroms.
## Use maximum Q in the data when not specified.
qmax = {self.qmax}
''')
out.write(qmax)
rrange = dedent(f'''\
## Limits and spacing for the calculated PDF r-grid.
## All values in Angstroms.
rmin = {self.rmin}
rmax = {self.rmax}
rstep = {self.rstep}
''')
out.write(rrange)
plot = dedent(f'''\
## Plot the specified results and activate interactive mode.
## A comma separated list with items such as "iq", "sq", "fq", "gr".
## No plot is produced when empty, "none" or "NONE".
''')
line = 'plot ='
for p in self.plot:
line += f' {p}'
plot += line + '\n'
out.write(plot)
interact = dedent(f'''\
## Start an IPython interactive session after processing all files.
## Useful for tuning the configuration parameters or interactive plotting.
## This is always on when plot option has been set.
interact = {self.interact}
''')
out.write(interact)
verbosity = dedent(f'''\
## Program verbosity - the minimum priority of the messages to be displayed.
## Possible values are (error, warning, info, debug, all) or an integer from
## 0 to 5, where 1 corresponds to error, 2 to warning and so forth.
verbose = {self.verbose}
''')
out.write(verbosity)
if self.verbose != 0:
print(f'Wrote config file to {filename}!')
def get_composition_from_MD(self, coord):
'''Use MD coordinate file to calculate the composition for the experimental PDF'''
pass
def run(self, inputs=[], config_name='tmp.cfg'):
'''Run pdfgetx3 with the parameters in self'''
self.write(config_name)
cmd = f'pdfgetx3 -c {config_name}'
for inp in inputs:
cmd += f' {inp}'
run(cmd)