diff --git a/docs/README.md b/docs/README.md
index 35b7f16..bf222fe 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,5 +1,6 @@
 To build docs:
 
-  - create an environment using the ``requirements.txt`` file in this directory
-  - run ``make`` in this directory
-  - find the docs is "./build/html", probably starting with file "index.html"
+  - `cd docs`
+  - create an environment using the `requirements.txt` file in this directory, e.g., `pip install -r requirements.txt`
+  - run `make html`
+  - open `build/html/index.html`
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 1d5ad92..23d479c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -21,7 +21,8 @@
 project = 'kerchunk'
 copyright = '2021, Martin Durant'
 author = 'Martin Durant'
-version = kerchunk.__version__
+# No easy way to get the latest version based on how the github pages are built/deployed, so leave it blank or else it will be 9999
+version = ''
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 4572bd4..15a2589 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -68,6 +68,7 @@ so that blocks from one or more files can be arranged into aggregate datasets ac
    beyond
    nonzarr
    reference
+   reference_aggregation
    contributing
    advanced
 
diff --git a/docs/source/reference.rst b/docs/source/reference.rst
index ffe7c32..98e4cfb 100644
--- a/docs/source/reference.rst
+++ b/docs/source/reference.rst
@@ -10,6 +10,7 @@ File format backends
    kerchunk.fits.process_file
    kerchunk.tiff.tiff_to_zarr
    kerchunk.netCDF3.NetCDF3ToZarr
+   kerchunk.hdf4.HDF4ToZarr
 
 .. autoclass:: kerchunk.hdf.SingleHdf5ToZarr
     :members:
@@ -24,6 +25,9 @@ File format backends
 .. autoclass:: kerchunk.netCDF3.NetCDF3ToZarr
     :members: __init__, translate
 
+.. autoclass:: kerchunk.hdf4.HDF4ToZarr
+    :members: __init__, translate
+
 Codecs
 ------
 
@@ -50,6 +54,9 @@ Codecs
 .. autoclass:: kerchunk.codecs.RecordArrayMember
     :members: __init__
 
+.. autoclass:: kerchunk.codecs.ZlibCodec
+    :members: __init__
+
 Combining
 ---------
 
diff --git a/docs/source/reference_aggregation.rst b/docs/source/reference_aggregation.rst
new file mode 100644
index 0000000..ee8e28f
--- /dev/null
+++ b/docs/source/reference_aggregation.rst
@@ -0,0 +1,224 @@
+Aggregation special cases
+=============================
+
+As we have already seen in this `page <https://fsspec.github.io/kerchunk/test_example.html#multi-file-jsons>`_,
+that the main purpose of ``kerchunk`` it to generate references, to view whole archive
+of files like GRIB2, NetCDF etc, allowing us for direct access to the data. In
+this part of the documentation, we will see some other efficient ways of
+combining references.
+
+GRIB Aggregations
+-----------------
+
+This reference aggregation method of GRIB files, developed by `Camus Energy <https://www.camus.energy>`_,
+and it functions if accompanying ``.idx`` files are present. It involves creating a reference index
+for every GRIB message across the files that we want to aggregate.
+
+**But this procedure has some certain restrictions:**
+
+  - GRIB files must paired with their ``.idx`` files
+  - The ``.idx`` file must be of *text* type.
+  - Only specialised for time-series data, where GRIB files
+    have *identical* structure.
+  - Each horizon(forecast time) must be indexed separately.
+
+
+Utilizing this method can significantly reduce the time required to combine
+references, cutting it down to a fraction of the previous duration. The original
+idea was showcased in this `talk <https://discourse.pangeo.io/t/pangeo-showcase-optimizations-for-kerchunk-aggregation-and-zarr-i-o-at-scale-for-machine-learning/4074>`_.
+It follows a three step approach.
+
+**Three step approach:**
+
+  1. Extract and persist metadata directly from a few arbitrary grib
+     files for a given product such as HRRR SUBH, GEFS, GFS etc.
+  2. Use the metadata mapping to build an index table of every grib
+     message from the ``.idx`` files
+  3. Combine the index data with the metadata to build any FMRC
+     slice (Horizon, RunTime, ValidTime, BestAvailable)
+
+
+*How is it faster*
+
+The ``.idx`` file otherwise known as an *index* file contains the key
+metadata of the messages in the GRIB files. These metadata include `index`, `offset`, `datetime`,
+`variable` and `forecast time` for their respective messages. This metadata
+will be used to index every GRIB message. By following this approach, we only have to
+``scan_grib`` a single GRIB file, not the whole archive.
+
+Building the index of a time horizon, first requires a single one-to-one mapping of GRIB/Zarr
+metadata to the attributes in the idx file. Only constraint is the mapping needs to be
+made from a single GRIB file, belonging to the *same time horizon*. The indexing process
+primarily involves the `pandas <https://pandas.pydata.org/>`_ library. To confirm this,
+see this `notebook <https://gist.github.com/Anu-Ra-g/efa01ad1c274c1bd1c14ee01666caa77>`_.
+After indexing a single time horizon, you can combine this index with indexes of
+other time horizon and store it.
+
+.. note::
+    The index in ``.idx`` file indexes the GRIB messages where as the ``k_index``
+    (kerchunk index), index the variables
+    in those messages.
+
+The table mentioned below is a *k_index* made from a single GRIB file.
+
+.. list-table:: k_index for a single GRIB file
+   :header-rows: 1
+   :widths: 5 10 15 10 20 15 10 20 20 30 10 10 10
+
+   * -
+     - varname
+     - typeOfLevel
+     - stepType
+     - name
+     - step
+     - level
+     - time
+     - valid_time
+     - uri
+     - offset
+     - length
+     - inline_value
+   * - 0
+     - gh
+     - isobaricInhPa
+     - instant
+     - Geopotential height
+     - 0 days 06:00:00
+     - 0.0
+     - 2017-01-01 06:00:00
+     - 2017-01-01 12:00:00
+     - s3://noaa-gefs-pds/gefs.20170101/06/gec00.t06z...
+     - 0
+     - 47493
+     - None
+   * - 1
+     - t
+     - isobaricInhPa
+     - instant
+     - Temperature
+     - 0 days 06:00:00
+     - 0.0
+     - 2017-01-01 06:00:00
+     - 2017-01-01 12:00:00
+     - s3://noaa-gefs-pds/gefs.20170101/06/gec00.t06z...
+     - 47493
+     - 19438
+     - None
+   * - 2
+     - r
+     - isobaricInhPa
+     - instant
+     - Relative humidity
+     - 0 days 06:00:00
+     - 0.0
+     - 2017-01-01 06:00:00
+     - 2017-01-01 12:00:00
+     - s3://noaa-gefs-pds/gefs.20170101/06/gec00.t06z...
+     - 66931
+     - 10835
+     - None
+   * - 3
+     - u
+     - isobaricInhPa
+     - instant
+     - U component of wind
+     - 0 days 06:00:00
+     - 0.0
+     - 2017-01-01 06:00:00
+     - 2017-01-01 12:00:00
+     - s3://noaa-gefs-pds/gefs.20170101/06/gec00.t06z...
+     - 77766
+     - 22625
+     - None
+   * - 4
+     - v
+     - isobaricInhPa
+     - instant
+     - V component of wind
+     - 0 days 06:00:00
+     - 0.0
+     - 2017-01-01 06:00:00
+     - 2017-01-01 12:00:00
+     - s3://noaa-gefs-pds/gefs.20170101/06/gec00.t06z...
+     - 100391
+     - 20488
+     - None
+
+
+*What now*
+
+After creating the k_index as per the desired duration, we will use the ``DataTree`` model
+from the `xarray-datatree <https://xarray-datatree.readthedocs.io/en/latest/>`_ to view a
+part(desired variables) or the whole of the aggregation, using the k_index. Below is a
+tree model made from an aggregation of GRIB files produced from **GEFS** model hosted
+in AWS S3 bucket.
+
+.. code-block:: bash
+
+    DataTree('None', parent=None)
+    ├── DataTree('prmsl')
+    │   │   Dimensions:  ()
+    │   │   Data variables:
+    │   │       *empty*
+    │   │   Attributes:
+    │   │       name:     Pressure reduced to MSL
+    │   └── DataTree('instant')
+    │       │   Dimensions:  ()
+    │       │   Data variables:
+    │       │       *empty*
+    │       │   Attributes:
+    │       │       stepType:  instant
+    │       └── DataTree('meanSea')
+    │               Dimensions:     (latitude: 181, longitude: 360, time: 1, step: 1,
+    │                                model_horizons: 1, valid_times: 237)
+    │               Coordinates:
+    │                 * latitude    (latitude) float64 1kB 90.0 89.0 88.0 87.0 ... -88.0 -89.0 -90.0
+    │                 * longitude   (longitude) float64 3kB 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0
+    │                   meanSea     float64 8B ...
+    │                   number      (time, step) int64 8B ...
+    │                   step        (model_horizons, valid_times) timedelta64[ns] 2kB ...
+    │                   time        (model_horizons, valid_times) datetime64[ns] 2kB ...
+    │                   valid_time  (model_horizons, valid_times) datetime64[ns] 2kB ...
+    │               Dimensions without coordinates: model_horizons, valid_times
+    │               Data variables:
+    │                   prmsl       (model_horizons, valid_times, latitude, longitude) float64 124MB ...
+    │               Attributes:
+    │                   typeOfLevel:  meanSea
+    └── DataTree('ulwrf')
+        │   Dimensions:  ()
+        │   Data variables:
+        │       *empty*
+        │   Attributes:
+        │       name:     Upward long-wave radiation flux
+        └── DataTree('avg')
+            │   Dimensions:  ()
+            │   Data variables:
+            │       *empty*
+            │   Attributes:
+            │       stepType:  avg
+            └── DataTree('nominalTop')
+                    Dimensions:     (latitude: 181, longitude: 360, time: 1, step: 1,
+                                        model_horizons: 1, valid_times: 237)
+                    Coordinates:
+                        * latitude    (latitude) float64 1kB 90.0 89.0 88.0 87.0 ... -88.0 -89.0 -90.0
+                        * longitude   (longitude) float64 3kB 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0
+                        nominalTop  float64 8B ...
+                        number      (time, step) int64 8B ...
+                        step        (model_horizons, valid_times) timedelta64[ns] 2kB ...
+                        time        (model_horizons, valid_times) datetime64[ns] 2kB ...
+                        valid_time  (model_horizons, valid_times) datetime64[ns] 2kB ...
+                    Dimensions without coordinates: model_horizons, valid_times
+                    Data variables:
+                        ulwrf       (model_horizons, valid_times, latitude, longitude) float64 124MB ...
+                    Attributes:
+                        typeOfLevel:  nominalTop
+
+
+.. tip::
+    For a full tutorial on this workflow, refer this `kerchunk cookbook <https://projectpythia.org/kerchunk-cookbook/README.html>`_
+    in `Project Pythia <https://projectpythia.org/>`_.
+
+.. raw:: html
+
+    <script data-goatcounter="https://kerchunk.goatcounter.com/count"
+            async src="//gc.zgo.at/count.js"></script>
diff --git a/kerchunk/codecs.py b/kerchunk/codecs.py
index 3d206e7..852076e 100644
--- a/kerchunk/codecs.py
+++ b/kerchunk/codecs.py
@@ -5,6 +5,7 @@
 from numcodecs.abc import Codec
 import numpy as np
 import threading
+import zlib
 
 
 class FillStringsCodec(Codec):
@@ -238,3 +239,19 @@ def decode(self, buf, out=None):
 
     def encode(self, buf):
         raise NotImplementedError
+
+
+class ZlibCodec(Codec):
+    codec_id = "zlib"
+
+    def __init__(self):
+        ...
+
+    def decode(self, data, out=None):
+        if out:
+            out[:] = zlib.decompress(data)
+            return out
+        return zlib.decompress(data)
+
+    def encode(self, buf):
+        return zlib.compress(buf)
diff --git a/kerchunk/hdf4.py b/kerchunk/hdf4.py
new file mode 100644
index 0000000..449e4e0
--- /dev/null
+++ b/kerchunk/hdf4.py
@@ -0,0 +1,509 @@
+import fsspec
+import numpy as np
+import ujson
+
+
+decoders = {}
+
+
+def reg(name):
+    def f(func):
+        decoders[name] = func
+        return func
+
+    return f
+
+
+class HDF4ToZarr:
+    """Experimental: interface to HDF4 archival files"""
+
+    def __init__(
+        self,
+        path,
+        storage_options=None,
+        inline_threshold=100,
+        out=None,
+    ):
+        self.path = path
+        self.st = storage_options
+        self.thresh = inline_threshold
+        self.out = out or {}
+
+    def read_int(self, n):
+        return int.from_bytes(self.f.read(n), "big")
+
+    def read_ddh(self):
+        return {"ndd": self.read_int(2), "next": self.read_int(4)}
+
+    def read_dd(self):
+        loc = self.f.tell()
+        i = int.from_bytes(self.f.read(2), "big")
+        if i & 0x4000:
+            extended = True
+            i = i - 0x4000
+        else:
+            extended = False
+        tag = tags.get(i, i)
+        no_data = tag not in {"NULL"}
+        ref = (tag, int.from_bytes(self.f.read(2), "big"))
+        info = {
+            "offset": int.from_bytes(self.f.read(4), "big") * no_data,
+            "length": int.from_bytes(self.f.read(4), "big") * no_data,
+            "extended": extended,
+            "loc": loc,
+        }
+        return ref, info
+
+    def decode(self, tag, info):
+        self.f.seek(info["offset"])
+        ident = lambda _, __: info
+        return decoders.get(tag, ident)(self, info)
+
+    def translate(self, filename=None, storage_options=None):
+        """Scan and return references
+
+        Parameters
+        ----------
+        filename: if given, write to this as JSON
+        storage_options: to interpret filename
+
+        Returns
+        -------
+        references
+        """
+        import zarr
+        from kerchunk.codecs import ZlibCodec
+
+        fo = fsspec.open(self.path, **(self.st or {}))
+        self.f = fo.open()
+
+        # magic header
+        assert self.f.read(4) == b"\x0e\x03\x13\x01"
+
+        # all the data descriptors in a linked list
+        self.tags = {}
+        while True:
+            ddh = self.read_ddh()
+
+            for _ in range(ddh["ndd"]):
+                ident, info = self.read_dd()
+                self.tags[ident] = info
+            if ddh["next"] == 0:
+                # "finished" sentry
+                break
+            # or continue
+            self.f.seek(ddh["next"])
+
+        # basic decode
+        for tag, ref in self.tags:
+            self._dec(tag, ref)
+
+        # global attributes
+        attrs = {}
+        for (tag, ref), info in self.tags.items():
+            if tag == "VH" and info["names"][0].upper() == "VALUES":
+                # dtype = dtypes[info["types"][0]]
+                inf2 = self.tags[("VS", ref)]
+                self.f.seek(inf2["offset"])
+                data = self.f.read(inf2["length"])
+                # NASA conventions
+                if info["name"].startswith(("CoreMetadata.", "ArchiveMetadata.")):
+                    obj = None
+                    for line in data.decode().split("\n"):
+                        if "OBJECT" in line:
+                            obj = line.split()[-1]
+                        if "VALUE" in line:
+                            attrs[obj] = line.split()[-1].lstrip('"').rstrip('"')
+
+        # there should be only one root, and it's probably the last VG
+        # so maybe this loop isn't needed
+        roots = set()
+        children = set()
+        child = {}
+        for (tag, ref), info in self.tags.items():
+            if tag == "VG":
+                here = child.setdefault((tag, ref), set())
+                for t, r in zip(info["tag"], info["refs"]):
+                    if t == "VG":
+                        children.add((t, r))
+                        roots.discard((t, r))
+                        here.add((t, r))
+                if tag not in children:
+                    roots.add((tag, ref))
+
+        # hierarchical output
+        output = self._descend_vg(*list(roots)[0])
+        prot = fo.fs.protocol
+        prot = prot[0] if isinstance(prot, tuple) else prot
+        fs = fsspec.filesystem(
+            "reference",
+            fo=self.out,
+            remote_protocol=prot,
+            remote_options=self.st,
+        )
+        g = zarr.open_group("reference://", storage_options=dict(fs=fs))
+        refs = {}
+        for k, v in output.items():
+            if isinstance(v, dict):
+                compression = ZlibCodec() if "refs" in v else None
+                arr = g.create_dataset(
+                    name=k,
+                    shape=v["dims"],
+                    dtype=v["dtype"],
+                    chunks=v.get("chunks", v["dims"]),
+                    compressor=compression,
+                    overwrite=True,
+                )
+                arr.attrs.update(
+                    dict(
+                        _ARRAY_DIMENSIONS=[f"{k}_x", f"{k}_y"][: len(v["dims"])]
+                        if "refs" in v
+                        else ["0"],
+                        **{
+                            i: j
+                            for i, j in v.items()
+                            if i not in {"chunk", "dims", "dtype", "refs"}
+                        },
+                    )
+                )
+                for r in v.get("refs", []):
+                    refs[f"{k}/{r[0]}"] = [self.path, r[1], r[2]]
+            else:
+                attrs[k] = v
+        fs.references.update(refs)
+        g.attrs.update(attrs)
+
+        if filename is None:
+            return fs.references
+        with fsspec.open(filename, **(storage_options or {})) as f:
+            ujson.dumps(dict(fs.references), f)
+
+    def _descend_vg(self, tag, ref):
+        info = self.tags[(tag, ref)]
+        out = {}
+        for t, r in zip(info["tag"], info["refs"]):
+            inf2 = self.tags[(t, r)]
+            if t == "VG":
+                tmp = self._descend_vg(t, r)
+                if list(tmp)[0] == inf2["name"]:
+                    tmp = tmp[inf2["name"]]
+                out[inf2["name"]] = tmp
+            elif t == "VH":
+                if len(inf2["names"]) == 1 and inf2["names"][0].lower() == "values":
+                    dtype = dtypes[inf2["types"][0]]
+                    name = inf2["name"]
+                    inf2 = self.tags[("VS", r)]
+                    self.f.seek(inf2["offset"])
+                    data = self.f.read(inf2["length"])
+                    if dtype == "str":
+                        out[name] = data.decode().lstrip('"').rstrip('"')  # decode() ?
+                    else:
+                        out[name] = np.frombuffer(data, dtype)[0]
+            elif t == "NT":
+                out["dtype"] = inf2["typ"]
+            elif t == "SD":
+                out["refs"] = inf2["data"][:-1]
+                out["chunks"] = [_["chunk_length"] for _ in inf2["data"][-1]]
+            elif t == "SDD":
+                out["dims"] = inf2["dims"]
+            else:
+                # NDGs contain same info as NT, SD and SDD
+                pass
+        return out
+
+    def _dec(self, tag, ref):
+        info = self.tags[(tag, ref)]
+        if not set(info) - {"length", "offset", "extended", "loc"}:
+            self.f.seek(info["offset"])
+            if info["extended"]:
+                info["data"] = self._dec_extended()
+            else:
+                info.update(self.decode(tag, info))
+        return info
+
+    def _dec_extended(self):
+        ext_type = spec[self.read_int(2)]
+        if ext_type == "CHUNKED":
+            return self._dec_chunked()
+        elif ext_type == "LINKED":
+            return self._dec_linked_header()
+        elif ext_type == "COMP":
+            return self._dec_comp()
+
+    def _dec_linked_header(self):
+        # get the bytes of a linked set - these will always be inlined
+        self.read_int(4)  # length
+        self.read_int(4)  # blk_len
+        self.read_int(4)  # num_blk
+        next_ref = self.read_int(2)
+        out = []
+        while next_ref:
+            next_ref, data = self._dec_linked_block(self.tags[("LINKED", next_ref)])
+            out.extend([d for d in data if d])
+        bits = []
+        for ref in out:
+            info = self.tags[("LINKED", ref)]
+            self.f.seek(info["offset"])
+            bits.append(self.f.read(info["length"]))
+        return b"".join(bits)
+
+    def _dec_linked_block(self, block):
+        self.f.seek(block["offset"])
+        next_ref = self.read_int(2)
+        refs = [self.read_int(2) for _ in range((block["length"] // 2) - 1)]
+        return next_ref, refs
+
+    def _dec_chunked(self):
+        # we want to turn the chunks table into references
+        # tag_head_len = self.read_int(4)
+        # version = self.f.read(1)[0]
+        # flag = self.read_int(4)
+        # elem_tot_len = self.read_int(4)
+        # chunk_size = self.read_int(4)
+        # nt_size = self.read_int(4)
+        self.f.seek(21, 1)
+        chk_tbl_tag = tags[self.read_int(2)]  # should be VH
+        chk_tbl_ref = self.read_int(2)
+        self.read_int(2)  # sp_tab = tags[self.read_int(2)]
+        self.read_int(2)  # sp_ref
+        ndims = self.read_int(4)
+
+        dims = [  # we don't use these, could skip
+            {
+                "flag": self.read_int(4),
+                "dim_length": self.read_int(4),
+                "chunk_length": self.read_int(4),
+            }
+            for _ in range(ndims)
+        ]
+        self.f.read(  # fill_value
+            self.read_int(4)
+        )  # to be interpreted as a number later; but chunk table probs has no fill
+        # self.f.seek(12*ndims + 4, 1)  # if skipping
+
+        header = self._dec(chk_tbl_tag, chk_tbl_ref)
+        data = self._dec("VS", chk_tbl_ref)["data"]  # corresponding table
+
+        # header gives the field pattern for the rows of data, one per chunk
+        # maybe faster to use struct and iter than numpy, since we iterate anyway
+        dt = [(f"ind{i}", ">u4") for i in range(ndims)] + [
+            ("tag", ">u2"),
+            ("ref", ">u2"),
+        ]
+        rows = np.frombuffer(data, dtype=dt, count=header["nvert"])
+        # rows["tag"] should always be 61 -> CHUNK
+        refs = []
+        for *ind, tag, ref in rows:
+            # maybe ind needs reversing since everything is FORTRAN
+            chunk_tag = self.tags[("CHUNK", ref)]
+            if chunk_tag["extended"]:
+                self.f.seek(chunk_tag["offset"])
+                # these are always COMP?
+                ctype, offset, length = self._dec_extended()
+                refs.append([".".join(str(_) for _ in ind), offset, length, ctype])
+            else:
+                refs.append(
+                    [
+                        ".".join(str(_) for _ in ind),
+                        chunk_tag["offset"],
+                        chunk_tag["length"],
+                    ]
+                )
+        refs.append(dims)
+        return refs
+
+    def _dec_comp(self):
+        # version = self.read_int(2)  # always 0
+        # len_uncomp = self.read_int(4)
+        self.f.seek(6, 1)
+
+        data_ref = self.read_int(2)
+        # model = self.read_int(2)  # always 0
+        ctype = "DEFLATE"  # comp[self.read_int(2)]
+        tag = self.tags[("COMPRESSED", data_ref)]
+        return ctype, tag["offset"], tag["length"]
+
+
+@reg("NDG")
+def _dec_ndg(self, info):
+    # links together these things as a Data Group
+    return {
+        "tags": [
+            (tags[self.read_int(2)], self.read_int(2))
+            for _ in range(0, info["length"], 4)
+        ]
+    }
+
+
+@reg("SDD")
+def _dec_sdd(self, info):
+    rank = self.read_int(2)
+    dims = [self.read_int(4) for _ in range(rank)]
+    data_tag = (tags[self.read_int(2)], self.read_int(2))
+    scale_tags = [(tags[self.read_int(2)], self.read_int(2)) for _ in range(rank)]
+    return _pl(locals())
+
+
+@reg("VERSION")
+def _dec_version(self, info):
+    return {
+        "major": self.read_int(4),
+        "minor": self.read_int(4),
+        "release": self.read_int(4),
+        "string:": _null_str(self.f.read(info["length"] - 10).decode()),
+    }
+
+
+@reg("VH")
+def _dec_vh(self, info):
+    # virtual group ("table") header
+    interface = self.read_int(2)
+    nvert = self.read_int(4)
+    ivsize = self.read_int(2)
+    nfields = self.read_int(2)
+    types = [self.read_int(2) for _ in range(nfields)]
+    isize = [self.read_int(2) for _ in range(nfields)]
+    offsets = [self.read_int(2) for _ in range(nfields)]
+    order = [self.read_int(2) for _ in range(nfields)]
+    names = [self.f.read(self.read_int(2)).decode() for _ in range(nfields)]
+    namelen = self.read_int(2)
+    name = self.f.read(namelen).decode()
+    classlen = self.read_int(2)
+    cls = self.f.read(classlen).decode()
+    ref = (self.read_int(2), self.read_int(2))
+    return _pl(locals())
+
+
+@reg("VG")
+def _dec_vg(self, info):
+    nelt = self.read_int(2)
+    tag = [tags[self.read_int(2)] for _ in range(nelt)]
+    refs = [self.read_int(2) for _ in range(nelt)]
+    name = self.f.read(self.read_int(2)).decode()
+    cls = self.f.read(self.read_int(2)).decode()
+    return _pl(locals())
+
+
+@reg("NT")
+def _dec_nt(self, info):
+    version, typ, width, cls = list(self.f.read(4))
+    typ = dtypes[typ]
+    return _pl(locals())
+
+
+def _null_str(s):
+    return s.split("\00", 1)[0]
+
+
+def _pl(l):
+    return {k: v for k, v in l.items() if k not in {"info", "f", "self"}}
+
+
+# hdf/src/htags.h
+tags = {
+    1: "NULL",
+    20: "LINKED",
+    30: "VERSION",
+    40: "COMPRESSED",
+    50: "VLINKED",
+    51: "VLINKED_DATA",
+    60: "CHUNKED",
+    61: "CHUNK",
+    100: "FID",
+    101: "FD",
+    102: "TID",
+    103: "TD",
+    104: "DIL",
+    105: "DIA",
+    106: "NT",
+    107: "MT",
+    108: "FREE",
+    200: "ID8",
+    201: "IP8",
+    202: "RI8",
+    203: "CI8",
+    204: "II8",
+    300: "ID",
+    301: "LUT",
+    302: "RI",
+    303: "CI",
+    304: "NRI",
+    306: "RIG",
+    307: "LD",
+    308: "MD",
+    309: "MA",
+    310: "CCN",
+    311: "CFM",
+    312: "AR",
+    400: "DRAW",
+    401: "RUN",
+    500: "XYP",
+    501: "MTO",
+    602: "T14",
+    603: "T105",
+    700: "SDG",
+    701: "SDD",
+    702: "SD",
+    703: "SDS",
+    704: "SDL",
+    705: "SDU",
+    706: "SDF",
+    707: "SDM",
+    708: "SDC",
+    709: "SDT",
+    710: "SDLNK",
+    720: "NDG",
+    721: "RESERVED",
+    # "Objects of tag 721 are never actually written to the file. The tag is
+    # needed to make things easier mixing DFSD and SD style objects in the same file"
+    731: "CAL",
+    732: "FV",
+    799: "BREQ",
+    781: "SDRAG",
+    780: "EREQ",
+    1965: "VG",
+    1962: "VH",
+    1963: "VS",
+    11: "RLE",
+    12: "IMCOMP",
+    13: "JPEG",
+    14: "GREYJPEG",
+    15: "JPEG5",
+    16: "GREYJPEG5",
+}
+spec = {
+    1: "LINKED",
+    2: "EXT",
+    3: "COMP",
+    4: "VLINKED",
+    5: "CHUNKED",
+    6: "BUFFERED",
+    7: "COMPRAS",
+}
+
+# hdf4/hdf/src/hntdefs.h
+dtypes = {
+    5: "f4",
+    6: "f8",
+    20: "i1",
+    21: "u1",
+    4: "str",  # special case, size given in header
+    22: ">i2",
+    23: ">u2",
+    24: ">i4",
+    25: ">u4",
+    26: ">i8",
+    27: ">u8",
+}
+
+# hdf4/hdf/src/hcomp.h
+comp = {
+    0: "NONE",
+    1: "RLE",
+    2: "NBIT",
+    3: "SKPHUFF",
+    4: "DEFLATE",  # called deflate, but code says "gzip" and doc says "GNU zip"; actually zlib?
+    # see codecs.ZlibCodec
+    5: "SZIP",
+    7: "JPEG",
+}
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index a20be2a..69fd22b 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -279,18 +279,18 @@ def test_compound_string_encode():
     assert (z.vlen_str["strs"][1:] == "").all()
 
 
-def test_compact():
-    pytest.importorskip("ipfsspec")
-    h = kerchunk.hdf.SingleHdf5ToZarr(
-        "ipfs://QmVZc4TzRP7zydgKzDX7CH2JpYw2LJKkWBm6jhCfigeon6"
-    )
-    out = h.translate()
-
-    m = fsspec.get_mapper("reference://", fo=out)
-    g = zarr.open(m)
-    assert np.allclose(g.ancillary_data.atlas_sdp_gps_epoch[:], 1.19880002e09)
-
-
+# def test_compact():
+#     pytest.importorskip("ipfsspec")
+#     h = kerchunk.hdf.SingleHdf5ToZarr(
+#         "ipfs://QmVZc4TzRP7zydgKzDX7CH2JpYw2LJKkWBm6jhCfigeon6"
+#     )
+#     out = h.translate()
+#
+#     m = fsspec.get_mapper("reference://", fo=out)
+#     g = zarr.open(m)
+#     assert np.allclose(g.ancillary_data.atlas_sdp_gps_epoch[:], 1.19880002e09)
+#
+#
 def test_compress():
     import glob
 
diff --git a/pyproject.toml b/pyproject.toml
index c11e340..5eb7c0c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,6 +61,7 @@ fill_hdf_strings = "kerchunk.codecs:FillStringsCodec"
 FITSAscii = "kerchunk.codecs:AsciiTableCodec"
 FITSVarBintable = "kerchunk.codecs:VarArrCodec"
 record_member = "kerchunk.codecs:RecordArrayMember"
+zlib = "kerchunk.codecs:ZlibCodec"
 
 [project.entry-points."xarray.backends"]
 kerchunk = "kerchunk.xarray_backend:KerchunkBackend"