Skip to content

Commit

Permalink
adding serialization/deserialization
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Nov 22, 2023
1 parent df21d49 commit 79218be
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 0 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,21 @@ about 0.0015%. The type is `binary_fuse16_t` and you may use it with
functions such as `binary_fuse16_allocate`, `binary_fuse16_populate`,
`binary_fuse8_contain` and `binary_fuse8_free`.
You may serialize the data as follows:
```C
size_t buffer_size = binary_fuse16_serialization_bytes(&filter);
char *buffer = (char*)malloc(buffer_size);
binary_fuse16_serialize(&filter, buffer);
binary_fuse16_free(&filter);
binary_fuse16_deserialize(&filter, buffer);
free(buffer);
```

The serialization does not handle endianess: it is expected that you will serialize
and deserialize on the little endian systems. (Big endian systems are vanishingly rare.)


## C++ wrapper

If you want a C++ version, you can roll your own:
Expand Down
96 changes: 96 additions & 0 deletions include/binaryfusefilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,103 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
return true;
}

static inline size_t binary_fuse16_serialization_bytes(binary_fuse16_t *filter) {
return sizeof(filter->Seed) + sizeof(filter->SegmentLength) +
sizeof(filter->SegmentLengthMask) + sizeof(filter->SegmentCount) +
sizeof(filter->SegmentCountLength) + sizeof(filter->ArrayLength) +
sizeof(uint16_t) * filter->ArrayLength;
}

static inline size_t binary_fuse8_serialization_bytes(const binary_fuse8_t *filter) {
return sizeof(filter->Seed) + sizeof(filter->SegmentLength) +
sizeof(filter->SegmentCount) +
sizeof(filter->SegmentCountLength) + sizeof(filter->ArrayLength) +
sizeof(uint8_t) * filter->ArrayLength;
}

// serialize a filter to a buffer, the buffer should have a capacity of at least
// binary_fuse16_serialization_bytes(filter) bytes.
// Native endianess only.
static inline void binary_fuse16_serialize(const binary_fuse16_t *filter, char *buffer) {
memcpy(buffer, &filter->Seed, sizeof(filter->Seed));
buffer += sizeof(filter->Seed);
memcpy(buffer, &filter->SegmentLength, sizeof(filter->SegmentLength));
buffer += sizeof(filter->SegmentLength);
memcpy(buffer, &filter->SegmentCount, sizeof(filter->SegmentCount));
buffer += sizeof(filter->SegmentCount);
memcpy(buffer, &filter->SegmentCountLength, sizeof(filter->SegmentCountLength));
buffer += sizeof(filter->SegmentCountLength);
memcpy(buffer, &filter->ArrayLength, sizeof(filter->ArrayLength));
buffer += sizeof(filter->ArrayLength);
memcpy(buffer, filter->Fingerprints, sizeof(filter->ArrayLength) * sizeof(uint16_t));
}

// serialize a filter to a buffer, the buffer should have a capacity of at least
// binary_fuse8_serialization_bytes(filter) bytes.
// Native endianess only.
static inline void binary_fuse8_serialize(const binary_fuse8_t *filter, char *buffer) {
memcpy(buffer, &filter->Seed, sizeof(filter->Seed));
buffer += sizeof(filter->Seed);
memcpy(buffer, &filter->SegmentLength, sizeof(filter->SegmentLength));
buffer += sizeof(filter->SegmentLength);
memcpy(buffer, &filter->SegmentCount, sizeof(filter->SegmentCount));
buffer += sizeof(filter->SegmentCount);
memcpy(buffer, &filter->SegmentCountLength, sizeof(filter->SegmentCountLength));
buffer += sizeof(filter->SegmentCountLength);
memcpy(buffer, &filter->ArrayLength, sizeof(filter->ArrayLength));
buffer += sizeof(filter->ArrayLength);
memcpy(buffer, filter->Fingerprints, sizeof(filter->ArrayLength) * sizeof(uint8_t));
}

// deserialize a filter from a buffer, returns true on success, false on failure.
// The output will be reallocated, so the caller should call binary_fuse16_free(filter) before
// if the filter was already allocated. The caller needs to call binary_fuse16_free(filter) after.
// The number of bytes read is binary_fuse16_serialization_bytes(output).
// Native endianess only.
static inline bool binary_fuse16_deserialize(binary_fuse16_t * filter, const char *buffer) {
memcpy(&filter->Seed, buffer, sizeof(filter->Seed));
buffer += sizeof(filter->Seed);
memcpy(&filter->SegmentLength, buffer, sizeof(filter->SegmentLength));
buffer += sizeof(filter->SegmentLength);
filter->SegmentLengthMask = filter->SegmentLength - 1;
memcpy(&filter->SegmentCount, buffer, sizeof(filter->SegmentCount));
buffer += sizeof(filter->SegmentCount);
memcpy(&filter->SegmentCountLength, buffer, sizeof(filter->SegmentCountLength));
buffer += sizeof(filter->SegmentCountLength);
memcpy(&filter->ArrayLength, buffer, sizeof(filter->ArrayLength));
buffer += sizeof(filter->ArrayLength);
filter->Fingerprints = (uint16_t*)malloc(filter->ArrayLength * sizeof(uint16_t));
if(filter->Fingerprints == NULL) {
return false;
}
memcpy(filter->Fingerprints, buffer, sizeof(filter->ArrayLength) * sizeof(uint16_t));
return true;
}


// deserialize a filter from a buffer, returns true on success, false on failure.
// The output will be reallocated, so the caller should call binary_fuse8_free(filter) before
// if the filter was already allocated. The caller needs to call binary_fuse8_free(filter) after.
// The number of bytes read is binary_fuse8_serialization_bytes(output).
// Native endianess only.
static inline bool binary_fuse8_deserialize(binary_fuse8_t * filter, const char *buffer) {
memcpy(&filter->Seed, buffer, sizeof(filter->Seed));
buffer += sizeof(filter->Seed);
memcpy(&filter->SegmentLength, buffer, sizeof(filter->SegmentLength));
buffer += sizeof(filter->SegmentLength);
filter->SegmentLengthMask = filter->SegmentLength - 1;
memcpy(&filter->SegmentCount, buffer, sizeof(filter->SegmentCount));
buffer += sizeof(filter->SegmentCount);
memcpy(&filter->SegmentCountLength, buffer, sizeof(filter->SegmentCountLength));
buffer += sizeof(filter->SegmentCountLength);
memcpy(&filter->ArrayLength, buffer, sizeof(filter->ArrayLength));
buffer += sizeof(filter->ArrayLength);
filter->Fingerprints = (uint8_t*)malloc(filter->ArrayLength * sizeof(uint8_t));
if(filter->Fingerprints == NULL) {
return false;
}
memcpy(filter->Fingerprints, buffer, sizeof(filter->ArrayLength) * sizeof(uint8_t));
return true;
}

#endif
72 changes: 72 additions & 0 deletions include/xorfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1280,4 +1280,76 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter



uint64_t seed;
uint64_t blockLength;
static inline size_t xor16_serialization_bytes(xor16_t *filter) {
return sizeof(filter->seed) + sizeof(filter->blockLength) +
sizeof(uint16_t) * 3 * filter->blockLength;
}

static inline size_t xor8_serialization_bytes(const xor8_t *filter) {
return sizeof(filter->seed) + sizeof(filter->blockLength) +
sizeof(uint8_t) * 3 * filter->blockLength;
}

// serialize a filter to a buffer, the buffer should have a capacity of at least
// xor16_serialization_bytes(filter) bytes.
// Native endianess only.
static inline void xor16_serialize(const xor16_t *filter, char *buffer) {
memcpy(buffer, &filter->seed, sizeof(filter->seed));
buffer += sizeof(filter->seed);
memcpy(buffer, &filter->blockLength, sizeof(filter->blockLength));
buffer += sizeof(filter->blockLength);
memcpy(buffer, filter->fingerprints, sizeof(filter->blockLength) * 3 * sizeof(uint16_t));
}

// serialize a filter to a buffer, the buffer should have a capacity of at least
// xor8_serialization_bytes(filter) bytes.
// Native endianess only.
static inline void xor8_serialize(const xor8_t *filter, char *buffer) {
memcpy(buffer, &filter->seed, sizeof(filter->seed));
buffer += sizeof(filter->seed);
memcpy(buffer, &filter->blockLength, sizeof(filter->blockLength));
buffer += sizeof(filter->blockLength);
memcpy(buffer, filter->fingerprints, sizeof(filter->blockLength) * 3 * sizeof(uint8_t));
}

// deserialize a filter from a buffer, returns true on success, false on failure.
// The output will be reallocated, so the caller should call xor16_free(filter) before
// if the filter was already allocated. The caller needs to call xor16_free(filter) after.
// The number of bytes read is xor16_serialization_bytes(filter).
// Native endianess only.
static inline bool xor16_deserialize(xor16_t * filter, const char *buffer) {
memcpy(&filter->seed, buffer, sizeof(filter->seed));
buffer += sizeof(filter->seed);
memcpy(&filter->blockLength, buffer, sizeof(filter->blockLength));
buffer += sizeof(filter->blockLength);
filter->fingerprints = (uint16_t*)malloc(filter->blockLength * 3 * sizeof(uint16_t));
if(filter->fingerprints == NULL) {
return false;
}
memcpy(filter->fingerprints, buffer, sizeof(filter->blockLength) * 3 * sizeof(uint16_t));
return true;
}


// deserialize a filter from a buffer, returns true on success, false on failure.
// The output will be reallocated, so the caller should call xor8_free(filter) before
// if the filter was already allocated. The caller needs to call xor8_free(filter) after.
// The number of bytes read is xor8_serialization_bytes(filter).
// Native endianess only.
static inline bool xor8_deserialize(xor8_t * filter, const char *buffer) {
memcpy(&filter->seed, buffer, sizeof(filter->seed));
buffer += sizeof(filter->seed);
memcpy(&filter->blockLength, buffer, sizeof(filter->blockLength));
buffer += sizeof(filter->blockLength);
filter->fingerprints = (uint8_t*)malloc(filter->blockLength * 3 * sizeof(uint8_t));
if(filter->fingerprints == NULL) {
return false;
}
memcpy(filter->fingerprints, buffer, sizeof(filter->blockLength) * 3 * sizeof(uint8_t));
return true;
}


#endif
35 changes: 35 additions & 0 deletions tests/unit.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ bool testxor8(size_t size) {
}
}

size_t buffer_size = xor8_serialization_bytes(&filter);
char *buffer = (char*)malloc(buffer_size);
xor8_serialize(&filter, buffer);
xor8_free(&filter);
xor8_deserialize(&filter, buffer);
free(buffer);

size_t random_matches = 0;
size_t trials = 10000000;
for (size_t i = 0; i < trials; i++) {
Expand Down Expand Up @@ -102,6 +109,13 @@ bool testxor16(size_t size) {
}
}

size_t buffer_size = xor16_serialization_bytes(&filter);
char *buffer = (char*)malloc(buffer_size);
xor16_serialize(&filter, buffer);
xor16_free(&filter);
xor16_deserialize(&filter, buffer);
free(buffer);

size_t random_matches = 0;
size_t trials = 10000000;
for (size_t i = 0; i < trials; i++) {
Expand Down Expand Up @@ -142,6 +156,13 @@ bool testbufferedxor16(size_t size) {
}
}

size_t buffer_size = xor16_serialization_bytes(&filter);
char *buffer = (char*)malloc(buffer_size);
xor16_serialize(&filter, buffer);
xor16_free(&filter);
xor16_deserialize(&filter, buffer);
free(buffer);

size_t random_matches = 0;
size_t trials = 10000000;
for (size_t i = 0; i < trials; i++) {
Expand Down Expand Up @@ -181,6 +202,13 @@ bool testbinaryfuse8(size_t size) {
}
}

size_t buffer_size = binary_fuse8_serialization_bytes(&filter);
char *buffer = (char*)malloc(buffer_size);
binary_fuse8_serialize(&filter, buffer);
binary_fuse8_free(&filter);
binary_fuse8_deserialize(&filter, buffer);
free(buffer);

size_t random_matches = 0;
size_t trials = 10000000;
for (size_t i = 0; i < trials; i++) {
Expand Down Expand Up @@ -222,6 +250,13 @@ bool testbinaryfuse16(size_t size) {
}
}

size_t buffer_size = binary_fuse16_serialization_bytes(&filter);
char *buffer = (char*)malloc(buffer_size);
binary_fuse16_serialize(&filter, buffer);
binary_fuse16_free(&filter);
binary_fuse16_deserialize(&filter, buffer);
free(buffer);

size_t random_matches = 0;
size_t trials = 10000000;
for (size_t i = 0; i < trials; i++) {
Expand Down

0 comments on commit 79218be

Please sign in to comment.