From 941f4609624725e7c3614b737f3d9999a4ac9e54 Mon Sep 17 00:00:00 2001 From: Alexey Pismenskiy Date: Wed, 7 Feb 2024 17:18:59 -0700 Subject: [PATCH] Serialize and compress hyperscan DB --- pom.xml | 5 +++ .../gliwka/hyperscan/wrapper/Database.java | 37 +++++++++++++++++++ .../hyperscan/wrapper/DatabaseTest.java | 33 +++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 src/test/java/com/gliwka/hyperscan/wrapper/DatabaseTest.java diff --git a/pom.xml b/pom.xml index eb917d3..0a6847a 100644 --- a/pom.xml +++ b/pom.xml @@ -216,6 +216,11 @@ javacpp 1.5.9 + + commons-codec + commons-codec + 1.16.0 + org.junit.jupiter junit-jupiter-api diff --git a/src/main/java/com/gliwka/hyperscan/wrapper/Database.java b/src/main/java/com/gliwka/hyperscan/wrapper/Database.java index 07368e5..0b76318 100644 --- a/src/main/java/com/gliwka/hyperscan/wrapper/Database.java +++ b/src/main/java/com/gliwka/hyperscan/wrapper/Database.java @@ -2,6 +2,8 @@ import com.gliwka.hyperscan.jni.hs_compile_error_t; import com.gliwka.hyperscan.jni.hs_database_t; +import org.apache.commons.codec.binary.Base64InputStream; +import org.apache.commons.codec.binary.Base64OutputStream; import org.bytedeco.javacpp.*; import java.io.*; @@ -10,6 +12,8 @@ import java.nio.charset.StandardCharsets; import java.util.*; import java.util.stream.Collectors; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.InflaterInputStream; import static com.gliwka.hyperscan.jni.hyperscan.*; import static java.util.Collections.singletonList; @@ -153,6 +157,39 @@ public void close() { database = null; } + /** + * Create BASE64 encoded and compressed database with expressions + * Database can be deserialized using {@link #deserialize(String)} + * + * @return serialized database + */ + public String serialize() throws IOException { + try ( + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + Base64OutputStream base64OutputStream = new Base64OutputStream(byteArrayOutputStream, true); + DeflaterOutputStream deflaterOutputStream = new DeflaterOutputStream(base64OutputStream) + ) { + save(deflaterOutputStream); + deflaterOutputStream.finish(); + return new String(byteArrayOutputStream.toByteArray(), StandardCharsets.UTF_8); + } + } + + /** + * Create database from BASE64 encoded string created by {@link #serialize()} + * @param input serialized database + * @return database + */ + public static Database deserialize(String input) throws IOException { + try ( + InputStream byteArrayInputStream = new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)); + Base64InputStream base64InputStream = new Base64InputStream(byteArrayInputStream, false); + InputStream inflaterInputStream = new InflaterInputStream(base64InputStream) + ) { + return load(inflaterInputStream); + } + } + /** * Saves the expressions and the compiled database to an OutputStream. * Expression contexts are not saved. diff --git a/src/test/java/com/gliwka/hyperscan/wrapper/DatabaseTest.java b/src/test/java/com/gliwka/hyperscan/wrapper/DatabaseTest.java new file mode 100644 index 0000000..6dfff24 --- /dev/null +++ b/src/test/java/com/gliwka/hyperscan/wrapper/DatabaseTest.java @@ -0,0 +1,33 @@ +package com.gliwka.hyperscan.wrapper; + +import org.junit.jupiter.api.Test; + +import java.util.EnumSet; +import java.util.LinkedList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class DatabaseTest { + + @Test + void serializationTest() throws Exception { + LinkedList expressions = new LinkedList<>(); + expressions.add(new Expression("[0-9]{5}", EnumSet.of(ExpressionFlag.SOM_LEFTMOST))); + expressions.add(new Expression("Test", EnumSet.of(ExpressionFlag.CASELESS))); + try ( + Database originalDb = Database.compile(expressions); + Scanner originalScanner = new Scanner(); + Database deserializedDb = Database.deserialize(originalDb.serialize()); + Scanner deserializedScanner = new Scanner(); + ) { + originalScanner.allocScratch(originalDb); + List matches = originalScanner.scan(originalDb, "Test 12345"); + assertEquals(2, matches.size()); + + deserializedScanner.allocScratch(deserializedDb); + List matchesFromSerialized = deserializedScanner.scan(deserializedDb, "Test 12345"); + assertEquals(2, matchesFromSerialized.size()); + } + } +}