Skip to content

Commit

Permalink
Merge branch 'main' into ci/add_arm64_macOS_wheels
Browse files Browse the repository at this point in the history
  • Loading branch information
psobolewskiPhD authored Jul 30, 2023
2 parents dab1dd5 + 2d7264c commit 21e233a
Show file tree
Hide file tree
Showing 10 changed files with 584 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/wheel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
with:
submodules: true

- uses: pypa/cibuildwheel@v2.12.0
- uses: pypa/cibuildwheel@v2.13.0

- uses: actions/upload-artifact@v3
with:
Expand Down
10 changes: 10 additions & 0 deletions docs/checksum32.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,13 @@ Fletcher32
.. automethod:: encode
.. automethod:: decode

JenkinsLookup3
--------------

.. autoclass:: JenkinsLookup3

.. autoattribute:: codec_id
.. autoattribute:: initval
.. autoattribute:: prefix
.. automethod:: encode
.. automethod:: decode
2 changes: 2 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Enhancements

* Add ``fletcher32`` checksum codec
By :user:`Martin Durant <martindurant>`, :issue:`410`.
* Add ``jenkins_lookup3`` checksum codec
By :user:`Mark Kittisopkul <mkitti>`, :issue:`445`.

Fix
~~~
Expand Down
3 changes: 2 additions & 1 deletion numcodecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,10 @@
from numcodecs.msgpacks import MsgPack
register_codec(MsgPack)

from numcodecs.checksum32 import CRC32, Adler32
from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3
register_codec(CRC32)
register_codec(Adler32)
register_codec(JenkinsLookup3)

from numcodecs.json import JSON
register_codec(JSON)
Expand Down
57 changes: 57 additions & 0 deletions numcodecs/checksum32.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@


import numpy as np
import struct


from .abc import Codec
from .compat import ensure_contiguous_ndarray, ndarray_copy
from .jenkins import jenkins_lookup3


class Checksum32(Codec):
Expand Down Expand Up @@ -40,3 +42,58 @@ class Adler32(Checksum32):

codec_id = 'adler32'
checksum = zlib.adler32


class JenkinsLookup3(Checksum32):
"""Bob Jenkin's lookup3 checksum with 32-bit output
This is the HDF5 implementation.
https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472
With this codec, the checksum is concatenated on the end of the data
bytes when encoded. At decode time, the checksum is performed on
the data portion and compared with the four-byte checksum, raising
RuntimeError if inconsistent.
Attributes:
initval: initial seed passed to the hash algorithm, default: 0
prefix: bytes prepended to the buffer before evaluating the hash, default: None
"""

checksum = jenkins_lookup3
codec_id = "jenkins_lookup3"

def __init__(self, initval: int = 0, prefix=None):
self.initval = initval
if prefix is None:
self.prefix = None
else:
self.prefix = np.frombuffer(prefix, dtype='uint8')

def encode(self, buf):
"""Return buffer plus 4-byte Bob Jenkin's lookup3 checksum"""
buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
if self.prefix is None:
val = jenkins_lookup3(buf, self.initval)
else:
val = jenkins_lookup3(np.hstack((self.prefix, buf)), self.initval)
return buf.tobytes() + struct.pack("<I", val)

def decode(self, buf, out=None):
"""Check Bob Jenkin's lookup3 checksum, and return buffer without it"""
b = ensure_contiguous_ndarray(buf).view('uint8')
if self.prefix is None:
val = jenkins_lookup3(b[:-4], self.initval)
else:
val = jenkins_lookup3(np.hstack((self.prefix, b[:-4])), self.initval)
found = b[-4:].view("<u4")[0]
if val != found:
raise RuntimeError(
f"The Bob Jenkin's lookup3 checksum of the data ({val}) did not"
f" match the expected checksum ({found}).\n"
"This could be a sign that the data has been corrupted."
)
if out is not None:
out.view("uint8")[:] = b[:-4]
return out
return memoryview(b[:-4])
2 changes: 1 addition & 1 deletion numcodecs/fletcher32.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class Fletcher32(Codec):
f" match the expected checksum ({found}).\n"
"This could be a sign that the data has been corrupted."
)
if out:
if out is not None:
out.view("uint8")[:] = b[:-4]
return out
return memoryview(b[:-4])
Loading

0 comments on commit 21e233a

Please sign in to comment.