-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12 from EPCCed/training-implementation
Training implementation
- Loading branch information
Showing
29 changed files
with
590 additions
and
515 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# ML Model Training | ||
|
||
Following on from the [data generation phase](data-generation.md) of our implementation for the Hasegawa-Wakatani example, this page describes how we train our ML models. | ||
|
||
1. Error calculation. | ||
|
||
We are at the stage of having fine-grained simulation trajectories, and from those, extracted data for each timestep, coarsened that data, and run single-timestep coarse-grained simulations. | ||
|
||
Now, the task is to take the difference between timestep 1 and timestep 0 of those coarse-grained simulations. | ||
|
||
UNFINISHED - email [email protected] if you get this far!! |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/usr/env/python | ||
# Script to generate training .nc files from BOUT coarse sim files | ||
# Based on traj_netcdf.ipynb | ||
import sys | ||
import numpy as np | ||
import xarray as xr | ||
from xbout import open_boutdataset | ||
from tqdm import tqdm, trange | ||
|
||
basedir = '//scratch/space1/x01/data/my-scratch-data' | ||
outdir = '/scratch/space1/x01/data/my-scratch-data/training/training_nc' | ||
|
||
def read_traj(traj): | ||
dvort0 = [] | ||
dvort1 = [] | ||
dn0 = [] | ||
dn1 = [] | ||
for i in trange(0, 1001): | ||
ds = open_boutdataset( | ||
f'{basedir}/trajectory_{traj}/{i}/coarse_sim/BOUT.dmp.*.nc', | ||
info=False) | ||
dvort0.append(ds['vort'][0,:,:,:]) | ||
dvort1.append(ds['vort'][1,:,:,:]) | ||
dn0.append(ds['n'][0,:,:,:]) | ||
dn1.append(ds['n'][1,:,:,:]) | ||
tvort0 = xr.concat(dvort0[1:], 't') | ||
tn0 = xr.concat(dn0[1:], 't') | ||
tvort1 = xr.concat(dvort1[:1001], 't') | ||
tn1 = xr.concat(dn1[:1001], 't') | ||
d0 = xr.merge([tvort0,tn0]) | ||
d1 = xr.merge([tvort1,tn1]) | ||
return d0, d1 | ||
|
||
def clean(ds): | ||
if 'metadata' in ds.attrs: | ||
del ds.attrs['metadata'] | ||
if 'options' in ds.attrs: | ||
del ds.attrs['options'] | ||
for variable in ds.variables.values(): | ||
if 'metadata' in variable.attrs: | ||
del variable.attrs['metadata'] | ||
if 'options' in variable.attrs: | ||
del variable.attrs['options'] | ||
|
||
traj = sys.argv[1] | ||
d0, d1 = read_traj(traj) | ||
clean(d0) | ||
clean(d1) | ||
d0.to_netcdf(f'{outdir}/gt_traj_{traj}.nc') | ||
d1.to_netcdf(f'{outdir}/sim_traj_{traj}.nc') | ||
#err=d0-d1 | ||
#err.to_netcdf(f'{outdir}/err_traj_{traj}.nc') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
#SBATCH --nodes=1 | ||
#SBATCH --ntasks=1 | ||
# #SBATCH --exclusive | ||
#SBATCH --time=01:00:00 | ||
#SBATCH --partition=standard | ||
#SBATCH --qos=standard | ||
#SBATCH --account=<account> | ||
|
||
eval "$(/work/x01/x01/$USER/miniconda3/bin/conda shell.bash hook)" | ||
conda activate boutsmartsim | ||
|
||
TRAJECTORY=1 | ||
|
||
python gen_training_nc.py $TRAJECTORY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Training pipeline for training the error correction ML model. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
""" Functions to load/augment training dataset """ | ||
import tensorflow as tf | ||
import numpy as np | ||
import netCDF4 as nc | ||
|
||
from typing import List, Tuple, Dict | ||
|
||
def extract_array_data(file_path: str, args) -> np.ndarray: | ||
dataset = nc.Dataset(file_path, 'r') | ||
|
||
# number of ghost cells in x dimension | ||
gx = 2 | ||
# extract vorticity and density without ghost cells and remove unit y direction | ||
vort_array = np.squeeze(dataset.variables['vort'][:,gx:-gx,:,:]) | ||
dens_array = np.squeeze(dataset.variables['n'][:,gx:-gx,:,:]) | ||
dataset.close() | ||
|
||
if args.vort_only: | ||
flow_image = np.stack([vort_array], axis=-1) | ||
elif args.dens_only: | ||
flow_image = np.stack([dens_array], axis=-1) | ||
else: | ||
flow_image = np.stack([vort_array, dens_array], axis=-1) | ||
return flow_image | ||
|
||
def translate_augmentation(fields: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]: | ||
coarse_image, error_image = fields['coarse'], fields['error'] | ||
#commented out for testing | ||
#if coarse_image.shape != error_image.shape: | ||
# raise ValueError(f"Coarse grained data and error should be same shape (got {coarse_image.shape} and {error_image.shape} respectively).") | ||
shape = tf.shape(coarse_image) | ||
nx, nz = shape[0], shape[1] | ||
shift_x = tf.random.uniform(shape=[], minval=0, maxval=nx-1, dtype=tf.int32) | ||
shift_z = tf.random.uniform(shape=[], minval=0, maxval=nz-1, dtype=tf.int32) | ||
|
||
# apply same shift to coarse snapshot and error | ||
coarse_shifted = tf.roll(coarse_image, shift_x, 0) | ||
coarse_shifted = tf.roll(coarse_shifted, shift_z, 1) | ||
error_shifted = tf.roll(error_image, shift_x, 0) | ||
error_shifted = tf.roll(error_shifted, shift_z, 1) | ||
return {'coarse': coarse_shifted, 'error': error_shifted} | ||
|
||
def data_generator(ground_truth_file_names: List[str], coarse_grained_file_names: List[str], args): | ||
for gt_file, cg_file in zip(ground_truth_file_names, coarse_grained_file_names): | ||
raw_data_gt = extract_array_data(gt_file, args) | ||
raw_data_cg = extract_array_data(cg_file, args)[1:] | ||
error = raw_data_gt - raw_data_cg | ||
|
||
# Reshape tensors to have dynamic dimensions | ||
raw_data_cg = tf.convert_to_tensor(raw_data_cg, dtype=tf.float64) | ||
error = tf.convert_to_tensor(error, dtype=tf.float64) | ||
for i in range(raw_data_cg.shape[0]): | ||
yield {'coarse': raw_data_cg[i], 'error': error[i]} | ||
|
||
def generate_augmented_dataset( | ||
ground_truth_file_names: List[str], | ||
coarse_grained_file_names: List[str], | ||
args, | ||
) -> tf.data.Dataset: | ||
if args.vort_only or args.dens_only: | ||
channels = 1 | ||
else: | ||
channels = 2 | ||
dataset = tf.data.Dataset.from_generator( | ||
lambda: data_generator(ground_truth_file_names, coarse_grained_file_names, args), | ||
output_signature={'coarse': tf.TensorSpec(shape=(None, None, channels), dtype=tf.float64), | ||
'error': tf.TensorSpec(shape=(None, None, channels), dtype=tf.float64)} | ||
) | ||
return dataset.map(translate_augmentation) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from tensorflow.keras.layers import Input, Normalization, Conv2D | ||
from tensorflow.keras import Model | ||
from tensorflow import keras | ||
from typing import Tuple | ||
|
||
import padding | ||
|
||
''' | ||
preprocessing: | ||
- "cyclic" padding along the flow direction (wall dimension is 0-padded): | ||
the input padded with 2 columns on both sides that 'wrap' around (see np.pad('wrap')) | ||
two rows of 0 on top and bottom | ||
! needs to be reapplied after every convolution layer | ||
''' | ||
|
||
''' | ||
TODO/preprocessing: | ||
- rescale input to [-1, 1]: | ||
To rescale an input in the [0, 255] range to be in the [-1, 1] range, | ||
you would pass scale=1./127.5, offset=-1. | ||
in general: scale = scaled_max/(max * .5) | ||
offset = min + scaled_min | ||
keras.layers.Rescaling(scale, offset=0.0, **kwargs) | ||
keras seems not to support min-max scaling with variable min/max, so will normalise instead | ||
''' | ||
|
||
def kochkov_cnn(image_shape: Tuple[int]) -> keras.Model: | ||
""" Todo: need a more automated way of padding e.g. if we adjust filter size. """ | ||
model = keras.Sequential() | ||
|
||
model.add(Input(shape=image_shape)) | ||
|
||
#1 | ||
# pad | ||
model.add(padding.CyclicPadding2D(padding=(1,1))) | ||
# model.add(keras.layers.ZeroPadding2D(padding=(1,0))) | ||
|
||
model.add(Conv2D (filters=64, kernel_size=3, padding ='valid', activation='relu')) | ||
|
||
#2 | ||
# pad | ||
model.add(padding.CyclicPadding2D(padding=(1,1))) | ||
# model.add(keras.layers.ZeroPadding2D(padding=(1,0))) | ||
|
||
model.add(Conv2D (filters =64, kernel_size =3, padding ='valid', activation='relu')) | ||
|
||
#3 | ||
# pad | ||
model.add(padding.CyclicPadding2D(padding=(1,1))) | ||
# model.add(keras.layers.ZeroPadding2D(padding=(1,0))) | ||
|
||
model.add(Conv2D (filters =64, kernel_size =3, padding ='valid', activation='relu')) | ||
|
||
#4 | ||
# pad | ||
model.add(padding.CyclicPadding2D(padding=(1,1))) | ||
# model.add(keras.layers.ZeroPadding2D(padding=(1,0))) | ||
|
||
model.add(Conv2D (filters =64, kernel_size =3, padding ='valid', activation='relu')) | ||
|
||
#5 | ||
# pad | ||
model.add(padding.CyclicPadding2D(padding=(1,1))) | ||
# model.add(keras.layers.ZeroPadding2D(padding=(1,0))) | ||
|
||
model.add(Conv2D (filters =64, kernel_size =3, padding ='valid', activation='relu')) | ||
|
||
#6 | ||
# pad | ||
model.add(padding.CyclicPadding2D(padding=(1,1))) | ||
# model.add(keras.layers.ZeroPadding2D(padding=(1,0))) | ||
|
||
model.add(Conv2D (filters =64, kernel_size =3, padding ='valid', activation='relu')) | ||
|
||
# output | ||
model.add(padding.CyclicPadding2D(padding=(1,1))) | ||
|
||
model.add(Conv2D(filters=image_shape[2], kernel_size =3, padding ='valid', activation='linear')) | ||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import tensorflow as tf | ||
|
||
# from keras.engine.base_layer import Layer | ||
# from keras.engine.input_spec import InputSpec | ||
# from keras.utils import conv_utils | ||
from tensorflow.keras.layers import Layer | ||
from tensorflow.keras.layers import InputSpec | ||
from tensorflow.python.keras.utils import conv_utils | ||
|
||
# some ideas here: | ||
# https://stackoverflow.com/questions/54911015/keras-convolution-layer-on-images-coming-from-circular-cyclic-domain | ||
|
||
class CyclicPadding2D(Layer): | ||
def __init__(self, padding=(1, 1), data_format=None, **kwargs): | ||
super().__init__(**kwargs) | ||
self.data_format = conv_utils.normalize_data_format(data_format) | ||
if len(padding) != 2: | ||
raise ValueError('`padding` should have two elements. ' | ||
f'Received: {padding}.') | ||
self.padding = padding | ||
self.input_spec = InputSpec(ndim=4) | ||
|
||
def get_config(self): | ||
config = super().get_config() | ||
config.update({ | ||
"padding": self.padding, | ||
"data_format": self.data_format, | ||
}) | ||
return config | ||
|
||
def compute_output_shape(self, input_shape): | ||
input_shape = tf.TensorShape(input_shape).as_list() | ||
if self.data_format == 'channels_first': | ||
if input_shape[2] is not None: | ||
rows = input_shape[2] + 2 * self.padding[0] | ||
else: | ||
rows = None | ||
if input_shape[3] is not None: | ||
cols = input_shape[3] + 2 * self.padding[1] | ||
else: | ||
cols = None | ||
return tf.TensorShape( | ||
[input_shape[0], input_shape[1], rows, cols]) | ||
elif self.data_format == 'channels_last': | ||
if input_shape[1] is not None: | ||
rows = input_shape[1] + 2 * self.padding[0] | ||
else: | ||
rows = None | ||
if input_shape[2] is not None: | ||
cols = input_shape[2] + 2 * self.padding[1] | ||
else: | ||
cols = None | ||
return tf.TensorShape([input_shape[0], rows, cols, input_shape[3]]) | ||
|
||
def call(self, inputs): | ||
tensor = inputs | ||
ndim = len(inputs.shape) | ||
for ax, pd in enumerate(self.padding): | ||
if self.data_format == "channels_last": | ||
#(batch, rows, cols, channels) | ||
axis = 1 + ax | ||
elif self.data_format == "channels_first": | ||
#(batch, channels, rows, cols) | ||
axis = 2 + ax | ||
else: | ||
return | ||
sl_start = [slice(None, pd) if i == axis else slice(None) for i in range(ndim)] | ||
sl_end = [slice(-pd, None) if i == axis else slice(None) for i in range(ndim)] | ||
tensor = tf.concat([ | ||
tensor[sl_end], | ||
tensor, | ||
tensor[sl_start], | ||
], axis) | ||
|
||
return tensor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/bash | ||
# | ||
#SBATCH --partition=gpu | ||
#SBATCH --qos=gpu | ||
#SBATCH --gres=gpu:1 | ||
#SBATCH --time=48:00:00 | ||
#SBATCH --account=x01 | ||
|
||
CUDA_VERSION=11.6 | ||
CUDNN_VERSION=8.6.0-cuda-${CUDA_VERSION} | ||
TENSORRT_VERSION=8.4.3.1-u2 | ||
|
||
module load intel-20.4/compilers | ||
module load nvidia/cudnn/${CUDNN_VERSION} | ||
module load nvidia/tensorrt/${TENSORRT_VERSION} | ||
module load nvidia/nvhpc | ||
|
||
conda activate boutsmartsim | ||
|
||
cd /path/to/SiMLInt/files/training | ||
|
||
# choose appropriate parameters here | ||
python training.py --epochs 100 --batch-size 32 --learning-rate 0.0001 |
Oops, something went wrong.