Skip to content

Commit

Permalink
hoge
Browse files Browse the repository at this point in the history
  • Loading branch information
bokutotu committed Jun 11, 2024
1 parent b68f1b5 commit 507b0c0
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 40 deletions.
2 changes: 1 addition & 1 deletion zenu-cuda/src/cudnn/batch_norm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ mod batch_norm {
bias_gpu,
running_mean_gpu,
running_variance_gpu,
0.1,
1.0,
saved_mean_gpu,
saved_variance_gpu,
)
Expand Down
2 changes: 1 addition & 1 deletion zenu-matrix/src/device/cpu.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{Device, DeviceBase};

#[derive(Copy, Clone, Default)]
#[derive(Copy, Clone, Default, Debug)]
pub struct Cpu;

impl DeviceBase for Cpu {
Expand Down
2 changes: 1 addition & 1 deletion zenu-matrix/src/device/nvidia.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::{Device, DeviceBase};
use crate::num::Num;

#[derive(Copy, Clone, Default)]
#[derive(Copy, Clone, Default, Debug)]
pub struct Nvidia;

impl DeviceBase for Nvidia {
Expand Down
155 changes: 118 additions & 37 deletions zenu-matrix/src/nn/batch_norm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::device::nvidia::Nvidia;

#[cfg(feature = "nvidia")]
fn batch_norm2d_forward_train_gpu<T: Num>(
momentum: T,
momentum: f64,
x: Matrix<Ref<&T>, DimDyn, Nvidia>,
y: Matrix<Ref<&mut T>, DimDyn, Nvidia>,
scale: Matrix<Ref<&T>, DimDyn, Nvidia>,
Expand All @@ -22,23 +22,21 @@ fn batch_norm2d_forward_train_gpu<T: Num>(
variance: Matrix<Ref<&mut T>, DimDyn, Nvidia>,
saving_mean: Matrix<Ref<&mut T>, DimDyn, Nvidia>,
saving_inv_variance: Matrix<Ref<&mut T>, DimDyn, Nvidia>,
epsilon: f64,
batch_norm: Option<BatchNorm2d<T>>,
) {
let alpha = T::one() - momentum;
let beta = momentum;
let momentum = 1. - momentum;
match batch_norm {
Some(batch_norm) => batch_norm
.forward_train(
alpha,
beta,
T::one(),
T::zero(),
x.as_ptr(),
y.as_mut_ptr(),
scale.as_ptr(),
bias.as_ptr(),
mean.as_mut_ptr(),
variance.as_mut_ptr(),
epsilon,
momentum,
saving_mean.as_mut_ptr(),
saving_inv_variance.as_mut_ptr(),
)
Expand Down Expand Up @@ -115,7 +113,7 @@ fn create_batch_norm_backward_gpu<T: Num>(
let batch_norm_backward = BatchNorm2dBackwardBuilder::<T>::new()
.input(input.0, input.1, input.2, input.3, TensorFormat::NCHW)
.unwrap()
.output(input.0, input.1, input.2, input.3, TensorFormat::NCHW)
.output_grad(input.0, input.1, input.2, input.3, TensorFormat::NCHW)
.unwrap()
.scale_bias_mean_var(input.1, TensorFormat::NCHW)
.unwrap()
Expand All @@ -131,39 +129,88 @@ fn batch_norm2d_forward_train_cpu<T: Num>(
bias: Matrix<Ref<&T>, DimDyn, Cpu>,
mean: Matrix<Ref<&mut T>, DimDyn, Cpu>,
variance: Matrix<Ref<&mut T>, DimDyn, Cpu>,
epsilon: f64,
saving_mean: Matrix<Ref<&mut T>, DimDyn, Cpu>,
saving_inv_variance: Matrix<Ref<&mut T>, DimDyn, Cpu>,
) {
let epsilon = T::from_f64(epsilon);
let epsilon = T::from_f64(1e-10);
let x_transposed = x.transpose_by_index_new_matrix(&[0, 2, 3, 1]);
let x_reshaped = x_transposed.reshape(&[
x_transposed.shape()[0] * x_transposed.shape()[2] * x_transposed.shape()[3],
x_transposed.shape()[1],
]);

let num_elements = T::from_usize(x_reshaped.shape()[0]); // 行数を取得
let num_elements = T::from_usize(x_reshaped.shape()[0]);

let x_mean = x_reshaped.mean(Some(0), false);
let x_diff = &x_reshaped - &x_mean;
let x_diff_squared = &x_diff * &x_diff;
let x_variance = x_diff_squared.mean(Some(0), false) * num_elements / (num_elements - T::one());

let inv_std = Matrix::<_, DimDyn, _>::ones(x_variance.shape()) / (x_variance.sqrt() + epsilon);
let x_hat = &x_diff * &inv_std;
let y_hat = x_hat * scale + bias;
let y_reshaped = y_hat.reshape(&[x.shape()[0], x.shape()[2], x.shape()[3], x.shape()[1]]);
let y_transposed = y_reshaped.transpose_by_index_new_matrix(&[0, 3, 1, 2]);
y.copy_from(&y_transposed);

let mean_t = &x_mean * (T::one() - momentum) + &mean * momentum;
let variance_t = x_variance * (T::one() - momentum) + &variance * momentum;

let inv_var = Matrix::<_, DimDyn, _>::ones(variance_t.shape()) / (&variance_t + epsilon);
let inv_std = inv_var.sqrt();

mean.copy_from(&mean_t);
variance.copy_from(&variance_t);

saving_mean.copy_from(&x_mean);
saving_inv_variance.copy_from(&inv_std);

let x_normalized = &x_diff * &inv_std;
let y_tmp = &x_normalized * &scale + &bias;
let y_transposed = y_tmp.reshape(&[
x_transposed.shape()[0],
x_transposed.shape()[2],
x_transposed.shape()[3],
x_transposed.shape()[1],
]);
y.copy_from(&y_transposed.transpose_by_index_new_matrix(&[0, 3, 1, 2]));
}

fn batch_norm2d_backward_cpu<T: Num>(
momentum: T,
x: Matrix<Ref<&T>, DimDyn, Cpu>,
x_grad: Matrix<Ref<&mut T>, DimDyn, Cpu>,
y_grad: Matrix<Ref<&T>, DimDyn, Cpu>,
scale: Matrix<Ref<&T>, DimDyn, Cpu>,
scale_grad: Matrix<Ref<&mut T>, DimDyn, Cpu>,
bias_grad: Matrix<Ref<&mut T>, DimDyn, Cpu>,
epsilon: f64,
saving_mean: Matrix<Ref<&T>, DimDyn, Cpu>,
saving_inv_variance: Matrix<Ref<&T>, DimDyn, Cpu>,
) {
let epsilon = T::from_f64(1e-10);
let batch_size = T::from_usize(x.shape()[0]);

let x_transposed = x.transpose_by_index_new_matrix(&[0, 2, 3, 1]);
let x_reshaped = x_transposed.reshape(&[
x_transposed.shape()[0] * x_transposed.shape()[2] * x_transposed.shape()[3],
x_transposed.shape()[1],
]);

let y_grad_transposed = y_grad.transpose_by_index_new_matrix(&[0, 2, 3, 1]);
let y_grad_reshaped = y_grad_transposed.reshape(&[
y_grad_transposed.shape()[0] * y_grad_transposed.shape()[2] * y_grad_transposed.shape()[3],
y_grad_transposed.shape()[1],
]);

let xc = (&x_reshaped - &saving_mean) * &saving_inv_variance;

bias_grad.copy_from(&y_grad_transposed.to_ref().sum(0, false));
scale_grad.copy_from(&(&xc * &y_grad_reshaped).to_ref().sum(0, false));

let tmp_x_grad = &y_grad_reshaped / batch_size - &xc * &scale_grad / batch_size;
let tmp_x_grad = &tmp_x_grad * &saving_inv_variance;

let x_grad_transposed = tmp_x_grad.reshape(&[
x_transposed.shape()[0],
x_transposed.shape()[2],
x_transposed.shape()[3],
x_transposed.shape()[1],
]);
x_grad.copy_from(&x_grad_transposed.transpose_by_index_new_matrix(&[0, 3, 1, 2]));
}

pub trait BatchNormalization: Device {
Expand All @@ -175,7 +222,6 @@ pub trait BatchNormalization: Device {
bias: Matrix<Ref<&T>, DimDyn, Self>,
mean: Matrix<Ref<&mut T>, DimDyn, Self>,
variance: Matrix<Ref<&mut T>, DimDyn, Self>,
epsilon: f64,
saving_mean: Matrix<Ref<&mut T>, DimDyn, Self>,
saving_inv_variance: Matrix<Ref<&mut T>, DimDyn, Self>,
device_batch_norm: Option<B>,
Expand Down Expand Up @@ -204,7 +250,6 @@ impl BatchNormalization for Cpu {
bias: Matrix<Ref<&T>, DimDyn, Self>,
mean: Matrix<Ref<&mut T>, DimDyn, Self>,
variance: Matrix<Ref<&mut T>, DimDyn, Self>,
epsilon: f64,
saving_mean: Matrix<Ref<&mut T>, DimDyn, Self>,
saving_inv_variance: Matrix<Ref<&mut T>, DimDyn, Self>,
_: Option<B>,
Expand All @@ -217,7 +262,6 @@ impl BatchNormalization for Cpu {
bias,
mean,
variance,
epsilon,
saving_mean,
saving_inv_variance,
);
Expand Down Expand Up @@ -255,42 +299,69 @@ mod batch_norm {
#[cfg(feature = "nvidia")]
use crate::device::nvidia::Nvidia;

#[derive(Debug)]
struct BatchNormInputs<D: Device> {
x: Matrix<Owned<f32>, DimDyn, D>,
y: Matrix<Owned<f32>, DimDyn, D>,
scale: Matrix<Owned<f32>, DimDyn, D>,
bias: Matrix<Owned<f32>, DimDyn, D>,
mean: Matrix<Owned<f32>, DimDyn, D>,
variance: Matrix<Owned<f32>, DimDyn, D>,
saved_mean: Matrix<Owned<f32>, DimDyn, D>,
saved_variance: Matrix<Owned<f32>, DimDyn, D>,
}

fn small_data<D: Device>() -> BatchNormInputs<D> {
let x = Matrix::<Owned<f32>, DimDyn, D>::from_vec(
vec![
0., 1., 2., 3., 4., 5., 6., 7., 0., 1., 2., 3., 4., 5., 6., 7.,
// 0., 1., 2., 3., 4., 5., 6., 7., 0., 1., 2., 3., 4., 5., 6., 7.,
-1.1258398,
-1.1523602,
-0.25057858,
-0.4338788,
0.84871036,
0.69200915,
-0.31601277,
-2.1152194,
0.32227492,
-1.2633348,
0.3499832,
0.30813393,
0.11984151,
1.2376579,
1.1167772,
-0.24727815,
],
&[2, 2, 2, 2],
);
let y = Matrix::<Owned<f32>, DimDyn, D>::zeros(x.shape());
let scale = Matrix::<Owned<f32>, DimDyn, D>::from_vec(vec![1., 1.], [2]);
let bias = Matrix::<Owned<f32>, DimDyn, D>::from_vec(vec![0., 0.], [2]);
let mean = Matrix::<Owned<f32>, DimDyn, D>::zeros([2]);
let variance = Matrix::<Owned<f32>, DimDyn, D>::zeros([2]);
let running_mean = vec![-0.04057, 0.01670607];
let running_variance = vec![0.9492437, 1.0200632];
let saved_mean = vec![-0.04057, 0.01670607];
let saved_variance = vec![0.9492437, 1.0200632];
let scale = vec![1.0, 1.0];
let bias = vec![0.0, 0.0];
let y = Matrix::<Owned<f32>, DimDyn, D>::zeros(&[2, 2, 2, 2]);
let mean = Matrix::<Owned<f32>, DimDyn, D>::from_vec(running_mean, &[2]);
let variance = Matrix::<Owned<f32>, DimDyn, D>::from_vec(running_variance, &[2]);
let scale = Matrix::<Owned<f32>, DimDyn, D>::from_vec(scale, &[2]);
let bias = Matrix::<Owned<f32>, DimDyn, D>::from_vec(bias, &[2]);
let saved_mean = Matrix::<Owned<f32>, DimDyn, D>::from_vec(saved_mean, &[2]);
let saved_variance = Matrix::<Owned<f32>, DimDyn, D>::from_vec(saved_variance, &[2]);
BatchNormInputs {
x,
y,
scale,
bias,
mean,
variance,
saved_mean,
saved_variance,
}
}

#[test]
fn small_cpu() {
let mut inputs = small_data::<Cpu>();
let mut savig_mean = Matrix::<Owned<f32>, DimDyn, Cpu>::zeros(&[2]);
let mut saving_inv_variance = Matrix::<Owned<f32>, DimDyn, Cpu>::zeros(&[2]);
batch_norm2d_forward_train_cpu(
0.0,
inputs.x.to_ref(),
Expand All @@ -299,18 +370,22 @@ mod batch_norm {
inputs.bias.to_ref(),
inputs.mean.to_ref_mut(),
inputs.variance.to_ref_mut(),
1e-5,
savig_mean.to_ref_mut(),
saving_inv_variance.to_ref_mut(),
inputs.saved_mean.to_ref_mut(),
inputs.saved_variance.to_ref_mut(),
);

println!("y {:?}", inputs.y);
println!("mean {:?}", inputs.mean);
println!("variance {:?}", inputs.variance);
println!("saved mean {:?}", inputs.saved_mean);
println!("saved variance {:?}", inputs.saved_variance);
panic!();
}

#[cfg(feature = "nvidia")]
#[test]
fn small_gpu() {
let mut inputs = small_data::<Nvidia>();
let mut savig_mean = Matrix::<Owned<f32>, DimDyn, Nvidia>::zeros(&[2]);
let mut saving_inv_variance = Matrix::<Owned<f32>, DimDyn, Nvidia>::zeros(&[2]);
let batch_norm = BatchNorm2dBuilder::<f32>::new()
.input(2, 2, 2, 2, TensorFormat::NCHW)
.unwrap()
Expand All @@ -328,10 +403,16 @@ mod batch_norm {
inputs.bias.to_ref(),
inputs.mean.to_ref_mut(),
inputs.variance.to_ref_mut(),
savig_mean.to_ref_mut(),
saving_inv_variance.to_ref_mut(),
1.,
inputs.saved_mean.to_ref_mut(),
inputs.saved_variance.to_ref_mut(),
Some(batch_norm),
);

println!("y {:?}", inputs.y);
println!("mean {:?}", inputs.mean);
println!("variance {:?}", inputs.variance);
println!("saved mean {:?}", inputs.saved_mean);
println!("saved variance {:?}", inputs.saved_variance);
panic!();
}
}

0 comments on commit 507b0c0

Please sign in to comment.