Skip to content

Commit

Permalink
Serialize and compress hyperscan DB
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey Pismenskiy committed Feb 8, 2024
1 parent 3c0e478 commit 941f460
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,11 @@
<artifactId>javacpp</artifactId>
<version>1.5.9</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.16.0</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
Expand Down
37 changes: 37 additions & 0 deletions src/main/java/com/gliwka/hyperscan/wrapper/Database.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import com.gliwka.hyperscan.jni.hs_compile_error_t;
import com.gliwka.hyperscan.jni.hs_database_t;
import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.bytedeco.javacpp.*;

import java.io.*;
Expand All @@ -10,6 +12,8 @@
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;

import static com.gliwka.hyperscan.jni.hyperscan.*;
import static java.util.Collections.singletonList;
Expand Down Expand Up @@ -153,6 +157,39 @@ public void close() {
database = null;
}

/**
* Create BASE64 encoded and compressed database with expressions
* Database can be deserialized using {@link #deserialize(String)}
*
* @return serialized database
*/
public String serialize() throws IOException {
try (
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
Base64OutputStream base64OutputStream = new Base64OutputStream(byteArrayOutputStream, true);
DeflaterOutputStream deflaterOutputStream = new DeflaterOutputStream(base64OutputStream)
) {
save(deflaterOutputStream);
deflaterOutputStream.finish();
return new String(byteArrayOutputStream.toByteArray(), StandardCharsets.UTF_8);
}
}

/**
* Create database from BASE64 encoded string created by {@link #serialize()}
* @param input serialized database
* @return database
*/
public static Database deserialize(String input) throws IOException {
try (
InputStream byteArrayInputStream = new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
Base64InputStream base64InputStream = new Base64InputStream(byteArrayInputStream, false);
InputStream inflaterInputStream = new InflaterInputStream(base64InputStream)
) {
return load(inflaterInputStream);
}
}

/**
* Saves the expressions and the compiled database to an OutputStream.
* Expression contexts are not saved.
Expand Down
33 changes: 33 additions & 0 deletions src/test/java/com/gliwka/hyperscan/wrapper/DatabaseTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package com.gliwka.hyperscan.wrapper;

import org.junit.jupiter.api.Test;

import java.util.EnumSet;
import java.util.LinkedList;
import java.util.List;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class DatabaseTest {

@Test
void serializationTest() throws Exception {
LinkedList<Expression> expressions = new LinkedList<>();
expressions.add(new Expression("[0-9]{5}", EnumSet.of(ExpressionFlag.SOM_LEFTMOST)));
expressions.add(new Expression("Test", EnumSet.of(ExpressionFlag.CASELESS)));
try (
Database originalDb = Database.compile(expressions);
Scanner originalScanner = new Scanner();
Database deserializedDb = Database.deserialize(originalDb.serialize());
Scanner deserializedScanner = new Scanner();
) {
originalScanner.allocScratch(originalDb);
List<Match> matches = originalScanner.scan(originalDb, "Test 12345");
assertEquals(2, matches.size());

deserializedScanner.allocScratch(deserializedDb);
List<Match> matchesFromSerialized = deserializedScanner.scan(deserializedDb, "Test 12345");
assertEquals(2, matchesFromSerialized.size());
}
}
}

0 comments on commit 941f460

Please sign in to comment.