diff --git a/README.md b/README.md index 89123a6..1cdfbdb 100644 --- a/README.md +++ b/README.md @@ -19,16 +19,36 @@ More details on [this Wikepedia article](https://en.wikipedia.org/wiki/Fowler%E2 ## Features -- speed: **4 000%** faster than basic Python implementation, **40%** faster than `pyhash` +- speed: + - up to **800 MB/s** hashing speed *(on Macbook Pro M1 (2020) with `fnv0_64`)* + - **6 800%** faster than basic Python implementation, **70%** faster than `pyhash` *(when hashing 100 bytes with `fnv0_64` on a cloud VM)* - portability: - tested with recent Python versions (3.7+) - compatible with ARM64 - - compatible with PyPy + - compatible (and tested) with PyPy ## Non features -- other hash algorithms (this library is only about FNV algorithm) -- too agressive CPU optimizations (we prefer maximizing binary portability) +- other hash algorithms *(this library is only about FNV algorithm)* +- too agressive CPU optimizations *(we prefer maximizing binary portability)* + +## Benchmark + +You have a benchmark script [here](bench.py) to bench `fnv-c` by yourself and to compare it with: +- [`fnvhash`](https://github.com/znerol/py-fnvhash) (pure python implementation) +- [`pyhash`](https://github.com/flier/pyfasthash) (more general hashing library with C++ extension) + +### Comparisons with other libraries (`fnv0_64` on a cloud VM) + +Differences with `fnvhash` are huge (from **35%** for one byte hashing to **19 000%** for 1 000 bytes hashing with `fnv0_64`) + +Differences with `pyhash` (on `fnv0_64`) are shown with the following diagram: + +![](bench.png) + +### Influence of string size on `fnv-c` hashing speed (on a Macbook Pro M1 (2020) with `fnv0_64`) + +![](bench2.png) ## How to install/use it? diff --git a/bench.png b/bench.png new file mode 100644 index 0000000..222cbfd Binary files /dev/null and b/bench.png differ diff --git a/bench.py b/bench.py index 471878a..99a859c 100644 --- a/bench.py +++ b/bench.py @@ -1,18 +1,30 @@ +import platform import timeit -SIZES = (1, 10, 100, 1000) -STMTS = ("fnv0_32", "fnv0_64", "fnv1_32", "fnv1_64", "fnv1a_32", "fnv1a_64") +BENCH_PYHASH = True +BENCH_FNVHASH = True -TOBENCHS = [ - { - "project": "fnv_c", - "import": "fnv_c", - }, - { - "project": "fnvhash", - "import": "fnvhash", - }, -] +SIZES = (1, 5, 10, 50, 100, 500, 1000) + +if BENCH_PYHASH and platform.machine() == "arm64": + print("WARNING: pyhash is not compatible with ARM64") + BENCH_PYHASH = False + +if BENCH_FNVHASH: + try: + pass + except ImportError: + print("WARNING: fnvhash is not installed") + print("=> do 'pip install fnvhash'") + BENCH_FNVHASH = False + +if BENCH_PYHASH: + try: + pass + except ImportError: + print("WARNING: pyhash is not installed") + print("=> do 'pip install pyhash'") + BENCH_PYHASH = False def bench(imprt, hasher): @@ -36,11 +48,13 @@ def bench(imprt, hasher): return timeit.timeit(stmt="hasher(b)", setup=setup) -for stmt in STMTS: - for size in SIZES: - print(f">>> Benchmarking (1M times) {stmt} on bytes of size {size}...") - print("") - for tobench in TOBENCHS: - print(f"{tobench['project']:10}", bench(tobench["import"], stmt)) - print("") - print("") +for size in SIZES: + print(f">>> Benchmarking (1M times) on bytes of size {size}...") + print("") + print("fnv0_64 (fnv-c) ", bench("fnv_c", "fnv0_64")) + if BENCH_PYHASH: + print("fnv0_64 (pyhash) ", bench("pyhash", "fnv1_64()")) + if BENCH_FNVHASH: + print("fnv0_64 (fnvhash) ", bench("fnvhash", "fnv0_64")) + print("") + print("") diff --git a/bench2.png b/bench2.png new file mode 100644 index 0000000..8fbe805 Binary files /dev/null and b/bench2.png differ