From 02b2fe309963e201992c424490c1b7b6f705af3d Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Thu, 13 Jul 2023 06:05:50 -0400 Subject: [PATCH 1/8] Add initial version of Cython jenkins lookup3 32-bit checksum --- numcodecs/__init__.py | 3 + numcodecs/jenkins.pyx | 374 ++++++++++++++++++++++++++++++++ numcodecs/tests/test_jenkins.py | 51 +++++ setup.py | 25 ++- 4 files changed, 452 insertions(+), 1 deletion(-) create mode 100644 numcodecs/jenkins.pyx create mode 100644 numcodecs/tests/test_jenkins.py diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index 1e3c8536..fc623a69 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -114,3 +114,6 @@ from numcodecs.fletcher32 import Fletcher32 register_codec(Fletcher32) + +from numcodecs.jenkins import JenkinsLookup3 +register_codec(JenkinsLookup3) diff --git a/numcodecs/jenkins.pyx b/numcodecs/jenkins.pyx new file mode 100644 index 00000000..09730379 --- /dev/null +++ b/numcodecs/jenkins.pyx @@ -0,0 +1,374 @@ +# cython: language_level=3 +# cython: overflowcheck=False +# cython: cdivision=True + +""" +Cython implementation of Bob Jenkin's hashlittle from lookup3.c. +This code was adapted from HDF5 by Mark Kittisopikul. +""" + +""" +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hashword(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +""" + +""" +HDF5 (Hierarchical Data Format 5) Software Library and Utilities +Copyright 2006 by The HDF Group. + +NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities +Copyright 1998-2006 by The Board of Trustees of the University of Illinois. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted for any purpose (including commercial purposes) +provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or materials provided with the distribution. + +3. Neither the name of The HDF Group, the name of the University, nor the + name of any Contributor may be used to endorse or promote products derived + from this software without specific prior written permission from + The HDF Group, the University, or the Contributor, respectively. + +DISCLAIMER: +THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS +"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. IN NO +EVENT SHALL THE HDF GROUP OR THE CONTRIBUTORS BE LIABLE FOR ANY DAMAGES +SUFFERED BY THE USERS ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You are under no obligation whatsoever to provide any bug fixes, patches, or +upgrades to the features, functionality or performance of the source code +("Enhancements") to anyone; however, if you choose to make your Enhancements +available either publicly, or directly to The HDF Group, without imposing a +separate written license agreement for such Enhancements, then you hereby +grant the following license: a non-exclusive, royalty-free perpetual license +to install, use, modify, prepare derivative works, incorporate into other +computer software, distribute, and sublicense such enhancements or derivative +works thereof, in binary and source code form. +""" + +import struct +import cython + +from numcodecs.checksum32 import Checksum32 +from numcodecs.compat import ensure_contiguous_ndarray + +from libc.stdint cimport uint8_t, uint16_t, uint32_t + +def jenkins_lookup3(data: bytes, initval: uint32_t=0): + """ + jenkins_lookup3(data: bytes, initval: uint32_t=0) + hash a variable-length key into a 32-bit value + + data : the key (unaligned variable-length array of bytes) + initval : can be any 4-byte value, defualts to 0 + + Returns a 32-bit value. Every bit of the key affects every bit of + the return value. Two keys differing by one or two bits will have + totally different hash values. + + The best hash table sizes are powers of 2. There is no need to do + mod a prime (mod is sooo slow!). If you need less than 32 bits, + use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)) + In which case, the hash table should have hashsize(10) elements. + + If you are hashing strings, do it like this: + ``` + h = 0 + for k in strings: + h = _jenkins_lookup3(k, h) + ``` + + By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this + code any way you wish, private, educational, or commercial. It's free. + + Use for hash table lookup, or anything where one collision in 2^^32 is + acceptable. Do NOT use for cryptographic purposes. + """ + return _jenkins_lookup3(data, initval) + +cdef uint32_t _jenkins_lookup3(const uint8_t[::1] _data, uint32_t initval=0): + """ + Implementation of jenkins_lookup3 + + Converted from H5_checksum_lookup3 + https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472 + + Originally hashlittle from https://www.burtleburtle.net/bob/c/lookup3.c + Alternatively, consider the hashword implementation if we can assume little endian and alignment. + """ + + cdef: + size_t length = _data.shape[0] + # internal state + uint32_t a, b, c = 0 + + # Set up the internal state + a = b = c = (0xdeadbeef) + (length) + initval + + # Return immediately for empty bytes + if length == 0: + return c + + cdef: + const uint8_t *k = &_data[0] + + # We are adding uint32_t values (words) byte by byte so we do not assume endianness or alignment + # lookup3.c hashlittle checks for alignment + + # all but the last block: affect some 32 bits of (a,b,c) + while length > 12: + a += k[0] + a += (k[1]) << 8 + a += (k[2]) << 16 + a += (k[3]) << 24 + b += k[4] + b += (k[5]) << 8 + b += (k[6]) << 16 + b += (k[7]) << 24 + c += k[8] + c += (k[9]) << 8 + c += (k[10]) << 16 + c += (k[11]) << 24 + a, b, c = _jenkins_lookup3_mix(a, b, c) + length -= 12 + k += 12 + + # -------------------------------- last block: affect all 32 bits of (c) + if length == 12: + c += (k[11]) << 24 + length -= 1 + + if length == 11: + c += (k[10]) << 16 + length -= 1 + + if length == 10: + c += (k[9]) << 8 + length -= 1 + + if length == 9: + c += k[8] + length -= 1 + + if length == 8: + b += (k[7]) << 24 + length -= 1 + + if length == 7: + b += (k[6]) << 16 + length -= 1 + + if length == 6: + b += (k[5]) << 8 + length -= 1 + + if length == 5: + b += k[4] + length -= 1 + + if length == 4: + a += (k[3]) << 24 + length -= 1 + + if length == 3: + a += (k[2]) << 16 + length -= 1 + + if length == 2: + a += (k[1]) << 8 + length -= 1 + + if length == 1: + a += k[0] + length -= 1 + + if length == 0: + pass + + return _jenkins_lookup3_final(a, b, c) + +cdef inline uint32_t _jenkins_lookup3_final(uint32_t a, uint32_t b, uint32_t c): + """ + _jenkins_lookup3_final -- final mixing of 3 32-bit values (a,b,c) into c + + Pairs of (a,b,c) values differing in only a few bits will usually + produce values of c that look totally different. This was tested for + * pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). + * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. + * the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + + These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 + and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 + """ + c ^= b + c -= _jenkins_lookup3_rot(b,14) + a ^= c + a -= _jenkins_lookup3_rot(c,11) + b ^= a + b -= _jenkins_lookup3_rot(a,25) + c ^= b + c -= _jenkins_lookup3_rot(b,16) + a ^= c + a -= _jenkins_lookup3_rot(c,4) + b ^= a + b -= _jenkins_lookup3_rot(a,14) + c ^= b + c -= _jenkins_lookup3_rot(b,24) + return c + +cdef inline uint32_t _jenkins_lookup3_rot(uint32_t x, uint8_t k): + return (((x) << (k)) ^ ((x) >> (32 - (k)))) + +cdef inline (uint32_t, uint32_t, uint32_t) _jenkins_lookup3_mix(uint32_t a, uint32_t b, uint32_t c): + """ + _jenkins_lookup3_mix -- mix 3 32-bit values reversibly. + + This is reversible, so any information in (a,b,c) before mix() is + still in (a,b,c) after mix(). + + If four pairs of (a,b,c) inputs are run through mix(), or through + mix() in reverse, there are at least 32 bits of the output that + are sometimes the same for one pair and different for another pair. + This was tested for: + * pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). + * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. + * the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + + Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that + satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 + Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing + for "differ" defined as + with a one-bit base and a two-bit delta. I + used http://burtleburtle.net/bob/hash/avalanche.html to choose + the operations, constants, and arrangements of the variables. + + This does not achieve avalanche. There are input bits of (a,b,c) + that fail to affect some output bits of (a,b,c), especially of a. The + most thoroughly mixed value is c, but it doesn't really even achieve + avalanche in c. + + This allows some parallelism. Read-after-writes are good at doubling + the number of bits affected, so the goal of mixing pulls in the opposite + direction as the goal of parallelism. I did what I could. Rotates + seem to cost as much as shifts on every machine I could lay my hands + on, and rotates are much kinder to the top and bottom bits, so I used + rotates. + """ + a -= c + a ^= _jenkins_lookup3_rot(c, 4) + c += b + b -= a + b ^= _jenkins_lookup3_rot(a, 6) + a += c + c -= b + c ^= _jenkins_lookup3_rot(b, 8) + b += a + a -= c + a ^= _jenkins_lookup3_rot(c, 16) + c += b + b -= a + b ^= _jenkins_lookup3_rot(a, 19) + a += c + c -= b + c ^= _jenkins_lookup3_rot(b, 4) + b += a + return a, b, c + +class JenkinsLookup3(Checksum32): + """Bob Jenkin's lookup3 checksum with 32-bit output + + This is the HDF5 implementation. + https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472 + + With this codec, the checksum is concatenated on the end of the data + bytes when encoded. At decode time, the checksum is performed on + the data portion and compared with the four-byte checksum, raising + RuntimeError if inconsistent. + """ + + checksum = jenkins_lookup3 + codec_id = "bob_jenkins_lookup3" + + def encode(self, buf, initval=0): + """Return buffer plus 4-byte Bob Jenkin's lookup3 checksum""" + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') + cdef const uint8_t[::1] b_ptr = buf + cdef uint32_t _initval = initval + val = _jenkins_lookup3(b_ptr, _initval) + return buf.tobytes() + struct.pack(" Date: Thu, 13 Jul 2023 06:19:29 -0400 Subject: [PATCH 2/8] Add release notes and docs --- docs/checksum32.rst | 8 ++++++++ docs/release.rst | 2 ++ 2 files changed, 10 insertions(+) diff --git a/docs/checksum32.rst b/docs/checksum32.rst index 5e682afc..ac954d32 100644 --- a/docs/checksum32.rst +++ b/docs/checksum32.rst @@ -33,3 +33,11 @@ Fletcher32 .. automethod:: encode .. automethod:: decode +JenkinsLookup3 +-------------- + +.. autoclass:: numcodecs.jenkins.JenkinsLookup3 + + .. autoattribute:: codec_id + .. automethod:: encode + .. automethod:: decode diff --git a/docs/release.rst b/docs/release.rst index 3f2394b5..c9033a85 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -17,6 +17,8 @@ Enhancements * Add ``fletcher32`` checksum codec By :user:`Martin Durant `, :issue:`410`. +* Add ``jenkins_lookup3`` checksum codec + By :user:`Mark Kittisopkul `, :issue:`445`. Fix ~~~ From 296e4eaf809a9d73c0ee3b295a99d64ea023cb79 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Fri, 14 Jul 2023 01:32:06 -0400 Subject: [PATCH 3/8] Respond to comments, flake8, add prefix --- numcodecs/__init__.py | 6 +-- numcodecs/checksum32.py | 53 +++++++++++++++++++ numcodecs/jenkins.pyx | 51 +----------------- numcodecs/tests/test_jenkins.py | 92 ++++++++++++++++++++++++++------- setup.py | 2 + 5 files changed, 131 insertions(+), 73 deletions(-) diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index fc623a69..3d7befe2 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -98,9 +98,10 @@ from numcodecs.msgpacks import MsgPack register_codec(MsgPack) -from numcodecs.checksum32 import CRC32, Adler32 +from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3 register_codec(CRC32) register_codec(Adler32) +register_codec(JenkinsLookup3) from numcodecs.json import JSON register_codec(JSON) @@ -114,6 +115,3 @@ from numcodecs.fletcher32 import Fletcher32 register_codec(Fletcher32) - -from numcodecs.jenkins import JenkinsLookup3 -register_codec(JenkinsLookup3) diff --git a/numcodecs/checksum32.py b/numcodecs/checksum32.py index 06dfbdb4..c26d1797 100644 --- a/numcodecs/checksum32.py +++ b/numcodecs/checksum32.py @@ -2,10 +2,12 @@ import numpy as np +import struct from .abc import Codec from .compat import ensure_contiguous_ndarray, ndarray_copy +from .jenkins import jenkins_lookup3 class Checksum32(Codec): @@ -40,3 +42,54 @@ class Adler32(Checksum32): codec_id = 'adler32' checksum = zlib.adler32 + + +class JenkinsLookup3(Checksum32): + """Bob Jenkin's lookup3 checksum with 32-bit output + + This is the HDF5 implementation. + https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472 + + With this codec, the checksum is concatenated on the end of the data + bytes when encoded. At decode time, the checksum is performed on + the data portion and compared with the four-byte checksum, raising + RuntimeError if inconsistent. + """ + + checksum = jenkins_lookup3 + codec_id = "jenkins_lookup3" + + def __init__(self, initval=0, prefix=None): + self.initval = initval + if prefix is None: + self.prefix = None + else: + self.prefix = np.frombuffer(prefix, dtype='uint8') + + def encode(self, buf): + """Return buffer plus 4-byte Bob Jenkin's lookup3 checksum""" + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') + if self.prefix is None: + val = jenkins_lookup3(buf, self.initval) + else: + val = jenkins_lookup3(np.hstack((self.prefix, buf)), self.initval) + return buf.tobytes() + struct.pack(" Date: Fri, 14 Jul 2023 01:57:39 -0400 Subject: [PATCH 4/8] Improve tests and documentation --- numcodecs/checksum32.py | 6 ++- numcodecs/tests/test_jenkins.py | 72 ++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 28 deletions(-) diff --git a/numcodecs/checksum32.py b/numcodecs/checksum32.py index c26d1797..4d8f64bf 100644 --- a/numcodecs/checksum32.py +++ b/numcodecs/checksum32.py @@ -54,12 +54,16 @@ class JenkinsLookup3(Checksum32): bytes when encoded. At decode time, the checksum is performed on the data portion and compared with the four-byte checksum, raising RuntimeError if inconsistent. + + Attributes: + initval: initial seed passed to the hash algorithm, default: 0 + prefix: bytes prepended to the buffer before evaluating the hash, default: None """ checksum = jenkins_lookup3 codec_id = "jenkins_lookup3" - def __init__(self, initval=0, prefix=None): + def __init__(self, initval: int = 0, prefix=None): self.initval = initval if prefix is None: self.prefix = None diff --git a/numcodecs/tests/test_jenkins.py b/numcodecs/tests/test_jenkins.py index 3619b209..16f2fbc6 100644 --- a/numcodecs/tests/test_jenkins.py +++ b/numcodecs/tests/test_jenkins.py @@ -18,33 +18,43 @@ def test_jenkins_lookup3(): h = jenkins_lookup3(b"jenkins", 0) assert h == 202276345 - h_last = 0 - h = jenkins_lookup3(b"", h_last) - assert h != h_last - h_last = h - h = jenkins_lookup3(b"", h_last) - assert h != h_last - h_last = h - h = jenkins_lookup3(b"", h_last) - assert h != h_last - h_last = h - h = jenkins_lookup3(b"", h_last) - assert h != h_last - h_last = h - h = jenkins_lookup3(b"", h_last) - assert h != h_last - h_last = h - h = jenkins_lookup3(b"", h_last) - assert h != h_last - h_last = h - h = jenkins_lookup3(b"", h_last) - assert h != h_last - h_last = h - h = jenkins_lookup3(b"", h_last) - assert h != h_last - h_last = h - h = jenkins_lookup3(b"", h_last) - assert h != h_last + h_last = [0] + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) a = np.frombuffer(b"Four score and seven years ago", dtype="uint8") h = jenkins_lookup3(a, 0) @@ -58,11 +68,19 @@ def test_jenkins_lookup3_codec(): assert result[-4:] == b'\x51\x05\x77\x17' assert bytes(j.decode(result)) == s + j = JenkinsLookup3(initval=0xdeadbeef) + result = j.encode(s) + assert bytes(j.decode(result)) == s + j = JenkinsLookup3(initval=1230) result = j.encode(s) assert result[-4:] == b'\xd7Z\xe2\x0e' assert bytes(j.decode(result)) == s + j = JenkinsLookup3(initval=1230, prefix=b"Hello world") + result = j.encode(s) + assert bytes(j.decode(result)) == s + chunk_index = b"\x00\x08\x00\x00\x00\x00\x00\x00" + \ b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ b"\xf7\x17\x00\x00\x00\x00\x00\x00" + \ From 33c18bcfc80ac45afe0ea69ec5d2312090debef8 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Sat, 15 Jul 2023 06:03:12 -0400 Subject: [PATCH 5/8] Improve test coverage --- docs/Makefile | 63 ++++++++++++++++-------------- docs/checksum32.rst | 4 +- numcodecs/checksum32.py | 2 +- numcodecs/fletcher32.pyx | 2 +- numcodecs/tests/test_fletcher32.py | 7 ++++ numcodecs/tests/test_jenkins.py | 28 +++++++++++++ 6 files changed, 73 insertions(+), 33 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index fe6a0bc4..19bcf031 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -7,10 +7,12 @@ SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) - $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) -endif +.PHONY: sphinx +sphinx: + # User-friendly check for sphinx-build + ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) + $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) + endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 @@ -54,44 +56,44 @@ clean: rm -rf $(BUILDDIR)/* .PHONY: html -html: +html: sphinx $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: dirhtml -dirhtml: +dirhtml: sphinx $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." .PHONY: singlehtml -singlehtml: +singlehtml: sphinx $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." .PHONY: pickle -pickle: +pickle: sphinx $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." .PHONY: json -json: +json: sphinx $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." .PHONY: htmlhelp -htmlhelp: +htmlhelp: sphinx $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." .PHONY: qthelp -qthelp: +qthelp: sphinx $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ @@ -101,7 +103,7 @@ qthelp: @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zarr.qhc" .PHONY: applehelp -applehelp: +applehelp: sphinx $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @@ -110,7 +112,7 @@ applehelp: "bundle." .PHONY: devhelp -devhelp: +devhelp: sphinx $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @@ -120,19 +122,19 @@ devhelp: @echo "# devhelp" .PHONY: epub -epub: +epub: sphinx $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." .PHONY: epub3 -epub3: +epub3: sphinx $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 @echo @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." .PHONY: latex -latex: +latex: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @@ -140,33 +142,33 @@ latex: "(use \`make latexpdf' here to do that automatically)." .PHONY: latexpdf -latexpdf: +latexpdf: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: latexpdfja -latexpdfja: +latexpdfja: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: text -text: +text: sphinx $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." .PHONY: man -man: +man: sphinx $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." .PHONY: texinfo -texinfo: +texinfo: sphinx $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @@ -174,57 +176,58 @@ texinfo: "(use \`make info' here to do that automatically)." .PHONY: info -info: +info: sphinx $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." .PHONY: gettext -gettext: +gettext: sphinx $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." .PHONY: changes -changes: +changes: sphinx $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." .PHONY: linkcheck -linkcheck: +linkcheck: sphinx $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." .PHONY: doctest -doctest: +doctest: sphinx $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." .PHONY: coverage -coverage: +coverage: sphinx $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." .PHONY: xml -xml: +xml: sphinx $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." .PHONY: pseudoxml -pseudoxml: +pseudoxml: sphinx $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." .PHONY: dummy -dummy: +dummy: sphinx $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy @echo @echo "Build finished. Dummy builder generates no files." + diff --git a/docs/checksum32.rst b/docs/checksum32.rst index ac954d32..5b2013f8 100644 --- a/docs/checksum32.rst +++ b/docs/checksum32.rst @@ -36,8 +36,10 @@ Fletcher32 JenkinsLookup3 -------------- -.. autoclass:: numcodecs.jenkins.JenkinsLookup3 +.. autoclass:: JenkinsLookup3 .. autoattribute:: codec_id + .. autoattribute:: initval + .. autoattribute:: prefix .. automethod:: encode .. automethod:: decode diff --git a/numcodecs/checksum32.py b/numcodecs/checksum32.py index 4d8f64bf..35a5ab99 100644 --- a/numcodecs/checksum32.py +++ b/numcodecs/checksum32.py @@ -93,7 +93,7 @@ def decode(self, buf, out=None): f" match the expected checksum ({found}).\n" "This could be a sign that the data has been corrupted." ) - if out: + if out is not None: out.view("uint8")[:] = b[:-4] return out return memoryview(b[:-4]) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index 02f9319c..7c7b159f 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -79,7 +79,7 @@ class Fletcher32(Codec): f" match the expected checksum ({found}).\n" "This could be a sign that the data has been corrupted." ) - if out: + if out is not None: out.view("uint8")[:] = b[:-4] return out return memoryview(b[:-4]) diff --git a/numcodecs/tests/test_fletcher32.py b/numcodecs/tests/test_fletcher32.py index 76564e95..aa4ca1ab 100644 --- a/numcodecs/tests/test_fletcher32.py +++ b/numcodecs/tests/test_fletcher32.py @@ -40,3 +40,10 @@ def test_known(): 1911, -2427, 1897, -2412, 2440, 873, -621, -829, 551, -2118, ] assert outarr.tolist() == expected + + +def test_out(): + data = np.frombuffer(bytearray(b"Hello World"), dtype="uint8") + f = Fletcher32() + result = f.encode(data) + f.decode(result, out=data) diff --git a/numcodecs/tests/test_jenkins.py b/numcodecs/tests/test_jenkins.py index 16f2fbc6..59ce8f50 100644 --- a/numcodecs/tests/test_jenkins.py +++ b/numcodecs/tests/test_jenkins.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from numcodecs.jenkins import jenkins_lookup3 from numcodecs.checksum32 import JenkinsLookup3 @@ -119,3 +120,30 @@ def test_jenkins_lookup3_codec(): result = j.encode(chunk_index[:-4]) j.decode(result) assert result == chunk_index + +@pytest.mark.parametrize( + "dtype", + ["uint8", "int32", "float32"] +) +def test_with_data(dtype): + data = np.arange(100, dtype=dtype) + j = JenkinsLookup3() + arr = np.frombuffer(j.decode(j.encode(data)), dtype=dtype) + assert (arr == data).all() + + +def test_error(): + data = np.arange(100) + j = JenkinsLookup3() + enc = j.encode(data) + enc2 = bytearray(enc) + enc2[0] += 1 + with pytest.raises(RuntimeError) as e: + j.decode(enc2) + assert "Bob Jenkin's lookup3 checksum" in str(e.value) + +def test_out(): + data = np.frombuffer(bytearray(b"Hello World"), dtype="uint8") + j = JenkinsLookup3() + result = j.encode(data) + j.decode(result, out=data) From 7484d43d8691d816d92e103557cdd47b6a6fc896 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Sat, 15 Jul 2023 12:30:35 -0400 Subject: [PATCH 6/8] Fix flake8 issues --- numcodecs/tests/test_jenkins.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/numcodecs/tests/test_jenkins.py b/numcodecs/tests/test_jenkins.py index 59ce8f50..4873e44f 100644 --- a/numcodecs/tests/test_jenkins.py +++ b/numcodecs/tests/test_jenkins.py @@ -121,6 +121,7 @@ def test_jenkins_lookup3_codec(): j.decode(result) assert result == chunk_index + @pytest.mark.parametrize( "dtype", ["uint8", "int32", "float32"] @@ -142,6 +143,7 @@ def test_error(): j.decode(enc2) assert "Bob Jenkin's lookup3 checksum" in str(e.value) + def test_out(): data = np.frombuffer(bytearray(b"Hello World"), dtype="uint8") j = JenkinsLookup3() From 28749bba3ce6b6219e54c4a44607c656cb01c5b2 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 18 Jul 2023 14:36:22 -0400 Subject: [PATCH 7/8] Remove fletcher32 modifications --- numcodecs/fletcher32.pyx | 2 +- numcodecs/tests/test_fletcher32.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index 7c7b159f..02f9319c 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -79,7 +79,7 @@ class Fletcher32(Codec): f" match the expected checksum ({found}).\n" "This could be a sign that the data has been corrupted." ) - if out is not None: + if out: out.view("uint8")[:] = b[:-4] return out return memoryview(b[:-4]) diff --git a/numcodecs/tests/test_fletcher32.py b/numcodecs/tests/test_fletcher32.py index aa4ca1ab..76564e95 100644 --- a/numcodecs/tests/test_fletcher32.py +++ b/numcodecs/tests/test_fletcher32.py @@ -40,10 +40,3 @@ def test_known(): 1911, -2427, 1897, -2412, 2440, 873, -621, -829, 551, -2118, ] assert outarr.tolist() == expected - - -def test_out(): - data = np.frombuffer(bytearray(b"Hello World"), dtype="uint8") - f = Fletcher32() - result = f.encode(data) - f.decode(result, out=data) From f579b0079b604e974281e4ddf07e02d7e06f9c3a Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 18 Jul 2023 14:48:05 -0400 Subject: [PATCH 8/8] Remove docs/Makefile modifications --- docs/Makefile | 63 ++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 19bcf031..fe6a0bc4 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -7,12 +7,10 @@ SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build -.PHONY: sphinx -sphinx: - # User-friendly check for sphinx-build - ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) - $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) - endif +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) + $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) +endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 @@ -56,44 +54,44 @@ clean: rm -rf $(BUILDDIR)/* .PHONY: html -html: sphinx +html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: dirhtml -dirhtml: sphinx +dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." .PHONY: singlehtml -singlehtml: sphinx +singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." .PHONY: pickle -pickle: sphinx +pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." .PHONY: json -json: sphinx +json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." .PHONY: htmlhelp -htmlhelp: sphinx +htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." .PHONY: qthelp -qthelp: sphinx +qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ @@ -103,7 +101,7 @@ qthelp: sphinx @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zarr.qhc" .PHONY: applehelp -applehelp: sphinx +applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @@ -112,7 +110,7 @@ applehelp: sphinx "bundle." .PHONY: devhelp -devhelp: sphinx +devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @@ -122,19 +120,19 @@ devhelp: sphinx @echo "# devhelp" .PHONY: epub -epub: sphinx +epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." .PHONY: epub3 -epub3: sphinx +epub3: $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 @echo @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." .PHONY: latex -latex: sphinx +latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @@ -142,33 +140,33 @@ latex: sphinx "(use \`make latexpdf' here to do that automatically)." .PHONY: latexpdf -latexpdf: sphinx +latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: latexpdfja -latexpdfja: sphinx +latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: text -text: sphinx +text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." .PHONY: man -man: sphinx +man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." .PHONY: texinfo -texinfo: sphinx +texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @@ -176,58 +174,57 @@ texinfo: sphinx "(use \`make info' here to do that automatically)." .PHONY: info -info: sphinx +info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." .PHONY: gettext -gettext: sphinx +gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." .PHONY: changes -changes: sphinx +changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." .PHONY: linkcheck -linkcheck: sphinx +linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." .PHONY: doctest -doctest: sphinx +doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." .PHONY: coverage -coverage: sphinx +coverage: $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." .PHONY: xml -xml: sphinx +xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." .PHONY: pseudoxml -pseudoxml: sphinx +pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." .PHONY: dummy -dummy: sphinx +dummy: $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy @echo @echo "Build finished. Dummy builder generates no files." -