ML Floating-Point Formats in HDF5

HDF5 Datatype Definition

Custom HDF5 floating-point datatype

Library

Header

#ifndef FMB_H
#define FMB_H

#include "hdf5.h"

<<fp16-head>>
<<bfloat16-head>>
<<tf32-head>>
<<fp24-head>>

extern hid_t make_fp_datatype
(
 size_t      bytes,
 H5T_order_t byte_order,
 size_t      precision,
 size_t      offset,
 size_t      spos,
 size_t      epos,
 size_t      esize,
 size_t      ebias,
 size_t      mpos,
 size_t      msize,
 H5T_norm_t  norm,
 H5T_pad_t   inpad,
 H5T_pad_t   lsb,
 H5T_pad_t   msb
 );

#endif /* FMB_H */

Functions

#include <assert.h>

hid_t make_fp_datatype
(
 size_t      bytes,
 H5T_order_t byte_order,
 size_t      precision,
 size_t      offset,
 size_t      spos,
 size_t      epos,
 size_t      esize,
 size_t      ebias,
 size_t      mpos,
 size_t      msize,
 H5T_norm_t  norm,
 H5T_pad_t   inpad,
 H5T_pad_t   lsb,
 H5T_pad_t   msb
 )
{
  hid_t result = (byte_order == H5T_ORDER_LE) ?
    H5Tcopy(H5T_IEEE_F64LE) : H5Tcopy(H5T_IEEE_F64BE);
  assert(result >= 0);
  assert(H5Tset_offset(result, offset) >= 0);
  assert(H5Tset_fields(result, spos, epos, esize, mpos, msize) >= 0);
  assert(H5Tset_ebias(result, ebias) >= 0);
  assert(H5Tset_norm(result, norm) >= 0);
  assert(H5Tset_inpad(result, inpad) >= 0);
  assert(H5Tset_pad(result, lsb, msb) >= 0);
  assert(H5Tset_precision(result, precision) >= 0);
  assert(H5Tset_size(result, bytes) >= 0);

  return result;
}

Add more sanity checks

A simple test

#include "fmb.h"

<<fmb-tail>>

<<fp16-tail>>
<<bfloat16-tail>>
<<tf32-tail>>
<<fp24-tail>>

int main(int argc, char** argv)
{
  hid_t file = H5Fcreate("float_my_boat.h5", H5F_ACC_TRUNC,
                         H5P_DEFAULT, H5P_DEFAULT);

  hid_t dtype = make_fp16();
  H5Tcommit(file, "fp16", dtype, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  H5Tclose(dtype);
  dtype = make_bfloat16();
  H5Tcommit(file, "bfloat16", dtype, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  H5Tclose(dtype);
  dtype = make_tf32();
  H5Tcommit(file, "tf32", dtype, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  H5Tclose(dtype);
  dtype = make_fp24();
  H5Tcommit(file, "fp24", dtype, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  H5Tclose(dtype);

  H5Fclose(file);
  return 0;
}

h5ls -v float_my_boat.h5

h5dump float_my_boat.h5

Unit Testing

How we test all this?

Create a sample HDF5 file with datasets for each type

Create a 1024 element dataset with [FP_MIN, -511.0, …, 0.0, …, 510.0, FP_MAX]
Create a dataset where we check the mantissa range
Create a dataset where we check the exponent range
Create a dataset where we check the NaN behavior
Create a dataset where we check the rounding properties
…

Include the corresponding MIN and MAX values

Conversion

Find conversion libraries

Check the availability of conversions on devices

Other

NVIDIA GPU Direct storage access to PCIe attached devices
How do we do that w/ HDF5?
- Pass the dataset/chunk addresses to the GPU
  - Make sure that the chunks have been allocated!

References

Floating-Point Formats and Deep Learning
bfloat16 floating-point format

Appendix

Floating-point Flavors

IEEE FP16

#define FP16_BYTES  2
#define FP16_ORDER  H5T_ORDER_LE
#define FP16_PREC   16
#define FP16_OFFSET 0
#define FP16_SPOS   15
#define FP16_EPOS   10
#define FP16_ESIZE  5
#define FP16_EBIAS  15
#define FP16_MPOS   0
#define FP16_MSIZE  10
#define FP16_NORM   H5T_NORM_MSBSET
#define FP16_INPAD  H5T_PAD_ZERO
#define FP16_LSB    H5T_PAD_ZERO
#define FP16_MSB    H5T_PAD_ZERO

extern hid_t make_fp16();

hid_t make_fp16()
{
  return make_fp_datatype(FP16_BYTES, FP16_ORDER, FP16_PREC,
                          FP16_ORDER, FP16_SPOS, FP16_EPOS,
                          FP16_ESIZE, FP16_EBIAS, FP16_MPOS,
                          FP16_MSIZE, FP16_NORM, FP16_INPAD,
                          FP16_LSB, FP16_MSB);
}

Google BFloat16

#define BFLOAT16_BYTES  2
#define BFLOAT16_ORDER  H5T_ORDER_LE
#define BFLOAT16_PREC   16
#define BFLOAT16_OFFSET 0
#define BFLOAT16_SPOS   15
#define BFLOAT16_EPOS   7
#define BFLOAT16_ESIZE  8
#define BFLOAT16_EBIAS  127
#define BFLOAT16_MPOS   0
#define BFLOAT16_MSIZE  7
#define BFLOAT16_NORM   H5T_NORM_MSBSET
#define BFLOAT16_INPAD  H5T_PAD_ZERO
#define BFLOAT16_LSB    H5T_PAD_ZERO
#define BFLOAT16_MSB    H5T_PAD_ZERO

extern hid_t make_bfloat16();

hid_t make_bfloat16()
{
  return make_fp_datatype(BFLOAT16_BYTES, BFLOAT16_ORDER, BFLOAT16_PREC,
                          BFLOAT16_ORDER, BFLOAT16_SPOS, BFLOAT16_EPOS,
                          BFLOAT16_ESIZE, BFLOAT16_EBIAS, BFLOAT16_MPOS,
                          BFLOAT16_MSIZE, BFLOAT16_NORM, BFLOAT16_INPAD,
                          BFLOAT16_LSB, BFLOAT16_MSB);
}

NVIDIA TensorFloat

#define TF32_BYTES  3
#define TF32_ORDER  H5T_ORDER_LE
#define TF32_PREC   19
#define TF32_OFFSET 0
#define TF32_SPOS   18
#define TF32_EPOS   10
#define TF32_ESIZE  8
#define TF32_EBIAS  127
#define TF32_MPOS   0
#define TF32_MSIZE  10
#define TF32_NORM   H5T_NORM_MSBSET
#define TF32_INPAD  H5T_PAD_ZERO
#define TF32_LSB    H5T_PAD_ZERO
#define TF32_MSB    H5T_PAD_ZERO

extern hid_t make_tf32();

hid_t make_tf32()
{
  return make_fp_datatype(TF32_BYTES, TF32_ORDER, TF32_PREC,
                          TF32_ORDER, TF32_SPOS, TF32_EPOS,
                          TF32_ESIZE, TF32_EBIAS, TF32_MPOS,
                          TF32_MSIZE, TF32_NORM, TF32_INPAD,
                          TF32_LSB, TF32_MSB);
}

Can we pack these closer together?

AMD FP24

#define FP24_BYTES  3
#define FP24_ORDER  H5T_ORDER_LE
#define FP24_PREC   24
#define FP24_OFFSET 0
#define FP24_SPOS   23
#define FP24_EPOS   16
#define FP24_ESIZE  7
#define FP24_EBIAS  63
#define FP24_MPOS   0
#define FP24_MSIZE  16
#define FP24_NORM   H5T_NORM_MSBSET
#define FP24_INPAD  H5T_PAD_ZERO
#define FP24_LSB    H5T_PAD_ZERO
#define FP24_MSB    H5T_PAD_ZERO

extern hid_t make_fp24();

hid_t make_fp24()
{
  return make_fp_datatype(FP24_BYTES, FP24_ORDER, FP24_PREC,
                          FP24_ORDER, FP24_SPOS, FP24_EPOS,
                          FP24_ESIZE, FP24_EBIAS, FP24_MPOS,
                          FP24_MSIZE, FP24_NORM, FP24_INPAD,
                          FP24_LSB, FP24_MSB);
}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

float-my-boat.org

float-my-boat.org

ML Floating-Point Formats in HDF5

HDF5 Datatype Definition

Custom HDF5 floating-point datatype

Library

Header

Functions

Add more sanity checks

A simple test

Unit Testing

Create a sample HDF5 file with datasets for each type

Include the corresponding MIN and MAX values

Conversion

Find conversion libraries

Check the availability of conversions on devices

Other

References

Appendix

Floating-point Flavors

IEEE FP16

Google BFloat16

NVIDIA TensorFloat

Can we pack these closer together?

AMD FP24

Consider n-bit filter?

Files

float-my-boat.org

Latest commit

History

float-my-boat.org

File metadata and controls

ML Floating-Point Formats in HDF5

HDF5 Datatype Definition

Custom HDF5 floating-point datatype

Library

Header

Functions

Add more sanity checks

A simple test

Unit Testing

Create a sample HDF5 file with datasets for each type

Include the corresponding MIN and MAX values

Conversion

Find conversion libraries

Check the availability of conversions on devices

Other

References

Appendix

Floating-point Flavors

IEEE FP16

Google BFloat16

NVIDIA TensorFloat

Can we pack these closer together?

AMD FP24

Consider n-bit filter?