Skip to content
/ zarr-ml Public

An implementation of the Zarr storage format specification for chunked & compressed multidimensional arrays.

License

Notifications You must be signed in to change notification settings

zoj613/zarr-ml

Repository files navigation

codecov CI license

zarr-ml

This library provides an OCaml implementation of the Zarr version 3 storage format specification for chunked & compressed multi-dimensional arrays, designed for use in parallel computing.

Features

  • Supports creating n-dimensional Zarr arrays and chunking them along any dimension.
  • Compresses chunks using a variety of supported compression codecs.
  • Supports indexing operations to read/write views of a Zarr array.
  • Supports storing arrays in-memory or the local filesystem. It is also extensible, allowing users to easily create and use their own custom storage backends. See the example implementing a Zip file store for more details.
  • Supports both synchronous and asynchronous I/O via Lwt and Eio. The user can easily use their own scheduler of choice. See the example implementing a filesystem store that uses the Picos concurrency library for non-blocking I/O.
  • Leverages the strong type system of Ocaml to create a type-safe API; making it impossible to create, read or write malformed arrays.
  • Supports organizing arrays into heirarchies via groups.

Documentation

API documentation can be found here. The full specification of the storage format can be found there.

Installation

The library comes in several flavors dependending on the synchronous/asynchronous backend of choice. To install the synchronous API, use

$ opam install zarr-sync

To install zarr with an asynchronous API powered by Lwt or Eio, use

$ opam install zarr-lwt
$ opam install zarr-eio

To install the development version using the latest git commit, do

# for zarr-sync
 opam pin add zarr-sync git+https://github.com/zoj613/zarr-ml 
# for zarr-lwt
 opam pin add zarr-lwt git+https://github.com/zoj613/zarr-ml 
# for zarr-eio
 opam pin add zarr-eio git+https://github.com/zoj613/zarr-ml 

Quick start

Below is a demonstration of the library's API for synchronous reads/writes. A similar example using the Lwt-backed Asynchronous API can be found here

setup

open Zarr
open Zarr.Codecs
open Zarr.Indexing
open Zarr_sync.Storage
open IO.Infix  (* opens infix operators >>= and >>| for monadic bind & map *)

let store = FilesystemStore.create "testdata.zarr";;

create group

let group_node = Node.Group.of_path "/some/group";;
FilesystemStore.Group.create store group_node;;

create an array

let array_node = Node.Array.(group_node / "name");;
(* creates an array with char data type and fill value '?' *)
FilesystemStore.Array.create
  ~codecs:[`Transpose [|2; 0; 1|]; `Bytes BE; `Gzip L2]
  ~shape:[|100; 100; 50|]
  ~chunks:[|10; 15; 20|]
  Ndarray.Char 
  '?'
  array_node
  store;;

read/write from/to an array

let slice = [|R [|0; 20|]; I 10; R [||]|];;
let x = FilesystemStore.Array.read store array_node slice Ndarray.Char;;
(* Do some computation on the array slice *)
let x' = Zarr.Ndarray.map (fun _ -> Random.int 256 |> Char.chr) x;;
FilesystemStore.Array.write store array_node slice x';;
let y = FilesystemStore.Array.read store array_node slice Ndarray.Char;;
assert (Ndarray.equal x' y);;

create an array with sharding

let config =
  {chunk_shape = [|5; 3; 5|]
  ;codecs = [`Transpose [|2; 0; 1|]; `Bytes LE; `Zstd (0, true)]
  ;index_codecs = [`Bytes BE; `Crc32c]
  ;index_location = Start};;

let shard_node = Node.Array.(group_node / "another");;

FilesystemStore.Array.create
  ~codecs:[`ShardingIndexed config]
  ~shape:[|100; 100; 50|]
  ~chunks:[|10; 15; 20|]
  Ndarray.Complex32
  Complex.zero
  shard_node
  store;;

exploratory functions

let a, g = FilesystemStore.hierarchy store;;
List.map Node.Array.to_path a;;
(*- : string list = ["/some/group/name"; "/some/group/another"] *)
List.map Node.Group.to_path g;;
(*- : string list = ["/"; "/some"; "/some/group"] *)

FilesystemStore.Array.reshape store array_node [|25; 32; 10|];;

let meta = FilesystemStore.Group.metadata store group_node;;
Metadata.Group.show meta;; (* pretty prints the contents of the metadata *)

FilesystemStore.Array.exists store shard_node;;
FilesystemStore.Group.exists store group_node;;

let a, g = FilesystemStore.Group.children store group_node;;
List.map Node.Array.to_path a;;
(*- : string list = ["/some/group/name"; "/some/group/another"] *)
List.map Node.Group.to_path g;;
(*- : string list = [] *)

FilesystemStore.Group.delete store group_node;;
FilesystemStore.clear store;; (* clears the store *)
FilesystemStore.Group.rename store group_node "new_name";;
FilesystemStore.Array.rename store anode "new_name";;

About

An implementation of the Zarr storage format specification for chunked & compressed multidimensional arrays.

Topics

Resources

License

Stars

Watchers

Forks

Packages

No packages published

Languages