From 28c5373f995c6c50f66e8faed8b4bca2968c132d Mon Sep 17 00:00:00 2001
From: Lukas Sismis <lsismis@oisf.net>
Date: Thu, 25 Jul 2024 10:34:21 +0200
Subject: [PATCH 1/4] hashlittle: add a safe variant of hashlittle2 function

This variant of hashlittle2() ensures that it avoids
accesses beyond the last byte of the string, which will
cause warnings from tools like Valgrind or Address
Sanitizer.
---
 src/util-hash-lookup3.c | 204 ++++++++++++++++++++++++++++++++++++++++
 src/util-hash-lookup3.h |   5 +
 2 files changed, 209 insertions(+)

diff --git a/src/util-hash-lookup3.c b/src/util-hash-lookup3.c
index 2354c183d219..c2b0fe275828 100644
--- a/src/util-hash-lookup3.c
+++ b/src/util-hash-lookup3.c
@@ -805,7 +805,211 @@ void hashlittle2(
   *pc=c; *pb=b;
 }
 
+/*
+ * hashlittle2: return 2 32-bit hash values
+ *
+ * This is identical to hashlittle(), except it returns two 32-bit hash
+ * values instead of just one.  This is good enough for hash table
+ * lookup with 2^^64 buckets, or if you want a second hash if you're not
+ * happy with the first, or if you want a probably-unique 64-bit ID for
+ * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
+ * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
+ */
+void hashlittle2_safe(const void *key, /* the key to hash */
+        size_t length,                 /* length of the key */
+        uint32_t *pc,                  /* IN: primary initval, OUT: primary hash */
+        uint32_t *pb)                  /* IN: secondary initval, OUT: secondary hash */
+{
+    uint32_t a, b, c; /* internal state */
+    union {
+        const void *ptr;
+        size_t i;
+    } u; /* needed for Mac Powerbook G4 */
+
+    /* Set up the internal state */
+    a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
+    c += *pb;
+
+    u.ptr = key;
+    if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+        const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+        /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+        while (length > 12) {
+            a += k[0];
+            b += k[1];
+            c += k[2];
+            mix(a, b, c);
+            length -= 12;
+            k += 3;
+        }
+
+        /*----------------------------- handle the last (probably partial) block */
+        /*
+         * Note that unlike hashlittle() above, we use the "safe" version of this
+         * block that is #ifdef VALGRIND above, in order to avoid warnings from
+         * Valgrind or Address Sanitizer.
+         */
+        const uint8_t *k8 = (const uint8_t *)k;
+        switch (length) {
+            case 12:
+                c += k[2];
+                b += k[1];
+                a += k[0];
+                break;
+            case 11:
+                c += ((uint32_t)k8[10]) << 16; /* fall through */
+            case 10:
+                c += ((uint32_t)k8[9]) << 8; /* fall through */
+            case 9:
+                c += k8[8]; /* fall through */
+            case 8:
+                b += k[1];
+                a += k[0];
+                break;
+            case 7:
+                b += ((uint32_t)k8[6]) << 16; /* fall through */
+            case 6:
+                b += ((uint32_t)k8[5]) << 8; /* fall through */
+            case 5:
+                b += k8[4]; /* fall through */
+            case 4:
+                a += k[0];
+                break;
+            case 3:
+                a += ((uint32_t)k8[2]) << 16; /* fall through */
+            case 2:
+                a += ((uint32_t)k8[1]) << 8; /* fall through */
+            case 1:
+                a += k8[0];
+                break;
+            case 0:
+                *pc = c;
+                *pb = b;
+                return; /* zero length strings require no mixing */
+        }
+
+    } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+        const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
+        const uint8_t *k8;
+
+        /*--------------- all but last block: aligned reads and different mixing */
+        while (length > 12) {
+            a += k[0] + (((uint32_t)k[1]) << 16);
+            b += k[2] + (((uint32_t)k[3]) << 16);
+            c += k[4] + (((uint32_t)k[5]) << 16);
+            mix(a, b, c);
+            length -= 12;
+            k += 6;
+        }
+
+        /*----------------------------- handle the last (probably partial) block */
+        k8 = (const uint8_t *)k;
+        switch (length) {
+            case 12:
+                c += k[4] + (((uint32_t)k[5]) << 16);
+                b += k[2] + (((uint32_t)k[3]) << 16);
+                a += k[0] + (((uint32_t)k[1]) << 16);
+                break;
+            case 11:
+                c += ((uint32_t)k8[10]) << 16; /* fall through */
+            case 10:
+                c += k[4];
+                b += k[2] + (((uint32_t)k[3]) << 16);
+                a += k[0] + (((uint32_t)k[1]) << 16);
+                break;
+            case 9:
+                c += k8[8]; /* fall through */
+            case 8:
+                b += k[2] + (((uint32_t)k[3]) << 16);
+                a += k[0] + (((uint32_t)k[1]) << 16);
+                break;
+            case 7:
+                b += ((uint32_t)k8[6]) << 16; /* fall through */
+            case 6:
+                b += k[2];
+                a += k[0] + (((uint32_t)k[1]) << 16);
+                break;
+            case 5:
+                b += k8[4]; /* fall through */
+            case 4:
+                a += k[0] + (((uint32_t)k[1]) << 16);
+                break;
+            case 3:
+                a += ((uint32_t)k8[2]) << 16; /* fall through */
+            case 2:
+                a += k[0];
+                break;
+            case 1:
+                a += k8[0];
+                break;
+            case 0:
+                *pc = c;
+                *pb = b;
+                return; /* zero length strings require no mixing */
+        }
+
+    } else { /* need to read the key one byte at a time */
+        const uint8_t *k = (const uint8_t *)key;
+
+        /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+        while (length > 12) {
+            a += k[0];
+            a += ((uint32_t)k[1]) << 8;
+            a += ((uint32_t)k[2]) << 16;
+            a += ((uint32_t)k[3]) << 24;
+            b += k[4];
+            b += ((uint32_t)k[5]) << 8;
+            b += ((uint32_t)k[6]) << 16;
+            b += ((uint32_t)k[7]) << 24;
+            c += k[8];
+            c += ((uint32_t)k[9]) << 8;
+            c += ((uint32_t)k[10]) << 16;
+            c += ((uint32_t)k[11]) << 24;
+            mix(a, b, c);
+            length -= 12;
+            k += 12;
+        }
+
+        /*-------------------------------- last block: affect all 32 bits of (c) */
+        switch (length) /* all the case statements fall through */
+        {
+            case 12:
+                c += ((uint32_t)k[11]) << 24; /* fall through */
+            case 11:
+                c += ((uint32_t)k[10]) << 16; /* fall through */
+            case 10:
+                c += ((uint32_t)k[9]) << 8; /* fall through */
+            case 9:
+                c += k[8]; /* fall through */
+            case 8:
+                b += ((uint32_t)k[7]) << 24; /* fall through */
+            case 7:
+                b += ((uint32_t)k[6]) << 16; /* fall through */
+            case 6:
+                b += ((uint32_t)k[5]) << 8; /* fall through */
+            case 5:
+                b += k[4]; /* fall through */
+            case 4:
+                a += ((uint32_t)k[3]) << 24; /* fall through */
+            case 3:
+                a += ((uint32_t)k[2]) << 16; /* fall through */
+            case 2:
+                a += ((uint32_t)k[1]) << 8; /* fall through */
+            case 1:
+                a += k[0];
+                break;
+            case 0:
+                *pc = c;
+                *pb = b;
+                return; /* zero length strings require no mixing */
+        }
+    }
 
+    final(a, b, c);
+    *pc = c;
+    *pb = b;
+}
 
 /*
  * hashbig():
diff --git a/src/util-hash-lookup3.h b/src/util-hash-lookup3.h
index 62d3d14270b1..ab7f6d3d88f3 100644
--- a/src/util-hash-lookup3.h
+++ b/src/util-hash-lookup3.h
@@ -62,6 +62,11 @@ void hashlittle2(const void *key,       /* the key to hash */
                  uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
                  uint32_t   *pb);       /* IN: secondary initval, OUT: secondary hash */
 
+/* A variant of hashlittle2() that ensures avoids accesses beyond the last byte
+ * of the string, which will cause warnings from tools like Valgrind or Address
+ * Sanitizer. */
+void hashlittle2_safe(const void *key, size_t length, uint32_t *pc, uint32_t *pb);
+
 uint32_t hashbig( const void *key, size_t length, uint32_t initval);
 
 #endif /* SURICATA_UTIL_HASH_LOOKUP3_H */

From 4cb7265be80ec1b46139022a27ff1f7a72503693 Mon Sep 17 00:00:00 2001
From: Lukas Sismis <lsismis@oisf.net>
Date: Thu, 11 Jul 2024 21:04:26 +0200
Subject: [PATCH 2/4] hyperscan: add caching mechanism for hyperscan contexts

Cache Hyperscan serialized databases to disk to prevent compilation
of the same databases when Suricata is run again with the same
ruleset.
The current work operates in /tmp/ folder and caches individual
signature group heads - potentially the ruleset might be even
slightly changed and it still can reuse part of the unchanged
signature groups.
Loading *fresh* ET Open ruleset:  19 seconds
Loading *cached* ET Open ruleset: 07 seconds
---
 Makefile.am                             |   1 +
 configure.ac                            |   4 +
 doc/userguide/performance/hyperscan.rst |  26 ++-
 src/detect-engine.c                     |  17 ++
 src/detect-engine.h                     |   2 +
 src/util-mpm-hs.c                       | 256 ++++++++++++++++++++++++
 suricata.yaml.in                        |   4 +
 7 files changed, 309 insertions(+), 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index 6e2f77cadc24..aaba7b479d1f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -36,6 +36,7 @@ install-conf:
 	install -d "$(DESTDIR)$(e_rundir)"
 	install -m 770 -d "$(DESTDIR)$(e_localstatedir)"
 	install -m 770 -d "$(DESTDIR)$(e_datadir)"
+	install -m 660 -d "$(DESTDIR)$(e_hscachedir)"
 
 install-rules:
 if INSTALL_SURICATA_UPDATE
diff --git a/configure.ac b/configure.ac
index 03ffadd89b27..8fdd624bd286 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2436,6 +2436,8 @@ if test "$WINDOWS_PATH" = "yes"; then
 
     e_sysconfdir="${e_winbase}\\\\"
     e_defaultruledir="$e_winbase\\\\rules\\\\"
+    e_hscachedir="$e_winbase\\\\cache\\\\hs\\\\"
+
     e_magic_file="$e_winbase\\\\magic.mgc"
     e_logdir="$e_winbase\\\\log"
     e_logfilesdir="$e_logdir\\\\files"
@@ -2457,6 +2459,7 @@ else
     EXPAND_VARIABLE(sysconfdir, e_sysconfdir, "/suricata/")
     EXPAND_VARIABLE(localstatedir, e_localstatedir, "/run/suricata")
     EXPAND_VARIABLE(datadir, e_datarulesdir, "/suricata/rules")
+    EXPAND_VARIABLE(localstatedir, e_hscachedir, "/lib/suricata/cache/hs")
     EXPAND_VARIABLE(localstatedir, e_datadir, "/lib/suricata/data")
     EXPAND_VARIABLE(localstatedir, e_defaultruledir, "/lib/suricata/rules")
 
@@ -2470,6 +2473,7 @@ AC_SUBST(e_logcertsdir)
 AC_SUBST(e_sysconfdir)
 AC_DEFINE_UNQUOTED([CONFIG_DIR],["$e_sysconfdir"],[Our CONFIG_DIR])
 AC_SUBST(e_localstatedir)
+AC_SUBST(e_hscachedir)
 AC_SUBST(e_datadir)
 AC_DEFINE_UNQUOTED([DATA_DIR],["$e_datadir"],[Our DATA_DIR])
 AC_SUBST(e_magic_file)
diff --git a/doc/userguide/performance/hyperscan.rst b/doc/userguide/performance/hyperscan.rst
index 055fa7f21b75..068226136005 100644
--- a/doc/userguide/performance/hyperscan.rst
+++ b/doc/userguide/performance/hyperscan.rst
@@ -81,4 +81,28 @@ if it is present on the system in case of the "auto" setting.
 
 
 If the current suricata installation does not have hyperscan
-support, refer to :ref:`installation`
\ No newline at end of file
+support, refer to :ref:`installation`
+
+Hyperscan caching
+~~~~~~~~~~~~~~~~~
+
+Upon startup, Hyperscan compiles and optimizes the ruleset into its own 
+internal structure. Suricata optimizes the startup process by saving 
+the Hyperscan internal structures to disk and loading them on the next start. 
+This prevents the recompilation of the ruleset and results in faster 
+initialization. If the ruleset is changed, new necessary cache files are 
+automatically created. 
+
+To enable this function, in `suricata.yaml` configure:
+
+::
+
+  # Cache MPM contexts to the disk to avoid rule compilation at the startup.
+  # Cache files are created in the default logging directory.
+  sgh-mpm-caching: yes
+  sgh-mpm-caching-path: /var/lib/suricata/cache/hs
+
+
+**Note**: 
+You might need create and adjust permissions to the default caching folder path, 
+especially if you are running Suricata as a non-root user.
diff --git a/src/detect-engine.c b/src/detect-engine.c
index 58b5c9967c8b..6502e9936a6f 100644
--- a/src/detect-engine.c
+++ b/src/detect-engine.c
@@ -2989,6 +2989,17 @@ static int DetectEngineCtxLoadConf(DetectEngineCtx *de_ctx)
     return 0;
 }
 
+bool DetectEngineMpmCachingEnabled(void)
+{
+    const char *strval = NULL;
+    if (ConfGet("detect.sgh-mpm-caching", &strval) != 1)
+        return false;
+
+    int sgh_mpm_caching = 0;
+    (void)ConfGetBool("detect.sgh-mpm-caching", &sgh_mpm_caching);
+    return (bool)sgh_mpm_caching;
+}
+
 /*
  * getting & (re)setting the internal sig i
  */
@@ -2997,6 +3008,12 @@ static int DetectEngineCtxLoadConf(DetectEngineCtx *de_ctx)
 //{
 //    return de_ctx->signum;
 //}
+const char *DetectEngineMpmCachingGetPath(void)
+{
+    const char *strval = NULL;
+    ConfGet("detect.sgh-mpm-caching-path", &strval);
+    return strval;
+}
 
 void DetectEngineResetMaxSigId(DetectEngineCtx *de_ctx)
 {
diff --git a/src/detect-engine.h b/src/detect-engine.h
index b0e443e1270a..b71b992b12f2 100644
--- a/src/detect-engine.h
+++ b/src/detect-engine.h
@@ -100,7 +100,9 @@ void *DetectThreadCtxGetGlobalKeywordThreadCtx(DetectEngineThreadCtx *det_ctx, i
 
 TmEcode DetectEngineThreadCtxInit(ThreadVars *, void *, void **);
 TmEcode DetectEngineThreadCtxDeinit(ThreadVars *, void *);
+bool DetectEngineMpmCachingEnabled(void);
 //inline uint32_t DetectEngineGetMaxSigId(DetectEngineCtx *);
+const char *DetectEngineMpmCachingGetPath(void);
 /* faster as a macro than a inline function on my box -- VJ */
 #define DetectEngineGetMaxSigId(de_ctx) ((de_ctx)->signum)
 void DetectEngineResetMaxSigId(DetectEngineCtx *);
diff --git a/src/util-mpm-hs.c b/src/util-mpm-hs.c
index 82b91b4ec0e8..02301eb7e415 100644
--- a/src/util-mpm-hs.c
+++ b/src/util-mpm-hs.c
@@ -33,6 +33,7 @@
 #include "detect-engine-build.h"
 
 #include "conf.h"
+#include "util-conf.h"
 #include "util-debug.h"
 #include "util-unittest.h"
 #include "util-unittest-helper.h"
@@ -42,6 +43,7 @@
 #include "util-hash.h"
 #include "util-hash-lookup3.h"
 #include "util-hyperscan.h"
+#include "util-path.h"
 
 #ifdef BUILD_HYPERSCAN
 
@@ -81,6 +83,53 @@ static SCMutex g_scratch_proto_mutex = SCMUTEX_INITIALIZER;
 static HashTable *g_db_table = NULL;
 static SCMutex g_db_table_mutex = SCMUTEX_INITIALIZER;
 
+/**
+ * Translates Hyperscan error codes to human-readable messages.
+ *
+ * \param error_code
+ *      The error code returned by a Hyperscan function.
+ * \return
+ *      A string describing the error.
+ */
+static const char *HSErrorToStr(hs_error_t error_code)
+{
+    switch (error_code) {
+        case HS_SUCCESS:
+            return "HS_SUCCESS: The engine completed normally";
+        case HS_INVALID:
+            return "HS_INVALID: A parameter passed to this function was invalid";
+        case HS_NOMEM:
+            return "HS_NOMEM: A memory allocation failed";
+        case HS_SCAN_TERMINATED:
+            return "HS_SCAN_TERMINATED: The engine was terminated by callback";
+        case HS_COMPILER_ERROR:
+            return "HS_COMPILER_ERROR: The pattern compiler failed";
+        case HS_DB_VERSION_ERROR:
+            return "HS_DB_VERSION_ERROR: The given database was built for a different version of "
+                   "Hyperscan";
+        case HS_DB_PLATFORM_ERROR:
+            return "HS_DB_PLATFORM_ERROR: The given database was built for a different platform "
+                   "(i.e., CPU type)";
+        case HS_DB_MODE_ERROR:
+            return "HS_DB_MODE_ERROR: The given database was built for a different mode of "
+                   "operation";
+        case HS_BAD_ALIGN:
+            return "HS_BAD_ALIGN: A parameter passed to this function was not correctly aligned";
+        case HS_BAD_ALLOC:
+            return "HS_BAD_ALLOC: The memory allocator did not return correctly aligned memory";
+        case HS_SCRATCH_IN_USE:
+            return "HS_SCRATCH_IN_USE: The scratch region was already in use";
+        case HS_ARCH_ERROR:
+            return "HS_ARCH_ERROR: Unsupported CPU architecture";
+        case HS_INSUFFICIENT_SPACE:
+            return "HS_INSUFFICIENT_SPACE: Provided buffer was too small";
+        case HS_UNKNOWN_ERROR:
+            return "HS_UNKNOWN_ERROR: Unexpected internal error";
+        default:
+            return "Unknown error code";
+    }
+}
+
 /**
  * \internal
  * \brief Wraps SCMalloc (which is a macro) so that it can be passed to
@@ -570,6 +619,170 @@ static PatternDatabase *PatternDatabaseAlloc(uint32_t pattern_cnt)
     return pd;
 }
 
+static const char *HSCacheConstructFPath(uint64_t hs_db_hash)
+{
+    static char hash_file_path[PATH_MAX];
+
+    char hash_file_path_suffix[] = "_v1.hs";
+    char filename[PATH_MAX];
+    int r = snprintf(filename, sizeof(filename), "%020lu%s", hs_db_hash, hash_file_path_suffix);
+    if (r != (int)(20 + strlen(hash_file_path_suffix)))
+        return NULL;
+
+    r = PathMerge(
+            hash_file_path, sizeof(hash_file_path), DetectEngineMpmCachingGetPath(), filename);
+    if (r)
+        return NULL;
+
+    return hash_file_path;
+}
+
+static char *HSReadStream(const char *file_path, size_t *buffer_sz)
+{
+    FILE *file = fopen(file_path, "rb");
+    if (!file) {
+        SCLogConfig("Failed to open file %s: %s", file_path, strerror(errno));
+        return NULL;
+    }
+
+    // Seek to the end of the file to determine its size
+    fseek(file, 0, SEEK_END);
+    long file_sz = ftell(file);
+    if (file_sz < 0) {
+        SCLogConfig("Failed to determine file size of %s: %s", file_path, strerror(errno));
+        fclose(file);
+        return NULL;
+    }
+
+    // Allocate a buffer to hold the entire file
+    char *buffer = (char *)SCCalloc(file_sz, sizeof(char));
+    if (!buffer) {
+        SCLogWarning("Failed to allocate memory");
+        fclose(file);
+        return NULL;
+    }
+
+    // Rewind file pointer and read the file into the buffer
+    rewind(file);
+    size_t bytes_read = fread(buffer, 1, file_sz, file);
+    if (bytes_read != (size_t)file_sz) {
+        SCLogConfig("Failed to read the entire file %s: %s", file_path, strerror(errno));
+        SCFree(buffer);
+        fclose(file);
+        return NULL;
+    }
+
+    *buffer_sz = file_sz;
+    fclose(file);
+    return buffer;
+}
+
+/**
+ * Function to hash the searched pattern, only things relevant to Hyperscan
+ * compilation are hashed.
+ */
+static void SCHSCachePatternHash(const SCHSPattern *p, uint32_t *h1, uint32_t *h2)
+{
+    BUG_ON(p->original_pat == NULL);
+    hashlittle2_safe(&p->len, sizeof(p->len), h1, h2);
+    hashlittle2_safe(&p->flags, sizeof(p->flags), h1, h2);
+    hashlittle2_safe(p->original_pat, p->len, h1, h2);
+    hashlittle2_safe(&p->offset, sizeof(p->offset), h1, h2);
+    hashlittle2_safe(&p->depth, sizeof(p->depth), h1, h2);
+}
+
+static int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash)
+{
+    if (SCCreateDirectoryTree(DetectEngineMpmCachingGetPath(), true) != 0)
+        return -1;
+    const char *hash_file_static = HSCacheConstructFPath(hs_db_hash);
+    if (hash_file_static == NULL)
+        return -1;
+
+    SCLogDebug("Loading the cached HS DB from %s", hash_file_static);
+    if (!SCPathExists(hash_file_static))
+        return -1;
+
+    FILE *db_cache = fopen(hash_file_static, "r");
+    char *buffer = NULL;
+    int ret = 0;
+    if (db_cache) {
+        size_t buffer_size;
+        buffer = HSReadStream(hash_file_static, &buffer_size);
+        if (!buffer) {
+            SCLogWarning("Hyperscan cached DB file %s cannot be read", hash_file_static);
+            ret = -1;
+            goto freeup;
+        }
+
+        hs_error_t error = hs_deserialize_database(buffer, buffer_size, hs_db);
+        if (error != HS_SUCCESS) {
+            SCLogWarning("Failed to deserialize Hyperscan database of %s: %s", hash_file_static,
+                    HSErrorToStr(error));
+            ret = -1;
+            goto freeup;
+        }
+
+        ret = 0;
+        goto freeup;
+    }
+
+freeup:
+    if (db_cache)
+        fclose(db_cache);
+    if (buffer)
+        SCFree(buffer);
+    return ret;
+}
+
+static void HSSaveCache(hs_database_t *hs_db, uint64_t hs_db_hash)
+{
+    static bool notified = false;
+    char *db_stream = NULL;
+    size_t db_size;
+
+    hs_error_t err = hs_serialize_database(hs_db, &db_stream, &db_size);
+    if (err != HS_SUCCESS) {
+        SCLogWarning("Failed to serialize Hyperscan database: %s", HSErrorToStr(err));
+        return;
+    }
+
+    const char *hash_file_static = HSCacheConstructFPath(hs_db_hash);
+    SCLogDebug("Caching the compiled HS at %s", hash_file_static);
+    if (SCPathExists(hash_file_static)) {
+        // potentially signs that it might not work as expected as we got into
+        // hash collision. If this happens with older and not used caches it is
+        // fine
+        // It is problematic when one ruleset yields two colliding MPM groups.
+        SCLogWarning("Overwriting cache file %s. If the problem persists consider switching off "
+                     "the caching",
+                hash_file_static);
+    }
+
+    FILE *db_cache_out = fopen(hash_file_static, "w");
+    if (!db_cache_out) {
+        if (!notified) {
+            SCLogWarning("Failed to create Hyperscan cache file, make sure the folder exist and is "
+                         "writable or adjust sgh-mpm-caching-path setting (%s)",
+                    hash_file_static);
+            notified = true;
+        }
+        goto cleanup;
+    }
+    int ret = fwrite(db_stream, sizeof(db_stream[0]), db_size, db_cache_out);
+    if (ret > 0 && (size_t)ret != db_size) {
+        SCLogWarning("Failed to write to file: %s", hash_file_static);
+    }
+    ret = fclose(db_cache_out);
+    if (ret != 0) {
+        SCLogWarning("Failed to close file: %s", hash_file_static);
+    }
+
+cleanup:
+    if (db_stream)
+        SCFree(db_stream);
+}
+
 /**
  * \brief Process the patterns added to the mpm, and create the internal tables.
  *
@@ -633,6 +846,7 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx)
      * before, and reuse the Hyperscan database if so. */
     PatternDatabase *pd_cached = HashTableLookup(g_db_table, pd, 1);
 
+    uint64_t cached_hash = 0;
     if (pd_cached != NULL) {
         SCLogDebug("Reusing cached database %p with %" PRIu32
                    " patterns (ref_cnt=%" PRIu32 ")",
@@ -644,6 +858,44 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx)
         PatternDatabaseFree(pd);
         SCHSFreeCompileData(cd);
         return 0;
+    } else if (DetectEngineMpmCachingEnabled()) {
+        uint32_t *hash = (uint32_t *)(&cached_hash);
+        hashword2(&pd->pattern_cnt, 1, &hash[0], &hash[1]);
+        for (uint32_t i = 0; i < pd->pattern_cnt; i++) {
+            SCHSCachePatternHash(pd->parray[i], &hash[0], &hash[1]);
+        }
+        if (HSLoadCache(&pd->hs_db, cached_hash) == 0) {
+            pd->ref_cnt = 1;
+            ctx->pattern_db = pd;
+
+            SCMutexLock(&g_scratch_proto_mutex);
+            err = hs_alloc_scratch(pd->hs_db, &g_scratch_proto);
+            SCMutexUnlock(&g_scratch_proto_mutex);
+            if (err != HS_SUCCESS) {
+                SCLogError("failed to allocate scratch: %s", HSErrorToStr(err));
+                SCMutexUnlock(&g_db_table_mutex);
+                goto error;
+            }
+
+            err = hs_database_size(pd->hs_db, &ctx->hs_db_size);
+            if (err != HS_SUCCESS) {
+                SCLogError("failed to query database size: %s", HSErrorToStr(err));
+                SCMutexUnlock(&g_db_table_mutex);
+                goto error;
+            }
+
+            mpm_ctx->memory_cnt++;
+            mpm_ctx->memory_size += ctx->hs_db_size;
+
+            int r = HashTableAdd(g_db_table, pd, 1);
+            SCMutexUnlock(&g_db_table_mutex);
+            if (r < 0)
+                goto error;
+
+            SCMutexUnlock(&g_db_table_mutex);
+            SCHSFreeCompileData(cd);
+            return 0;
+        }
     }
 
     BUG_ON(ctx->pattern_db != NULL); /* already built? */
@@ -725,6 +977,10 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx)
     if (r < 0)
         goto error;
 
+    if (DetectEngineMpmCachingEnabled()) {
+        HSSaveCache(pd->hs_db, cached_hash);
+    }
+
     SCHSFreeCompileData(cd);
     return 0;
 
diff --git a/suricata.yaml.in b/suricata.yaml.in
index 3e73623c4b21..6db5306aaab2 100644
--- a/suricata.yaml.in
+++ b/suricata.yaml.in
@@ -1685,6 +1685,10 @@ detect:
     toclient-groups: 3
     toserver-groups: 25
   sgh-mpm-context: auto
+  # Cache MPM contexts to the disk for avoid rule compilation at the startup.
+  # Cache files are created in the default logging directory.
+  sgh-mpm-caching: yes
+  sgh-mpm-caching-path: @e_hscachedir@
   inspection-recursion-limit: 3000
   # maximum number of times a tx will get logged for a stream-only rule match
   # stream-tx-log-limit: 4

From 653466cf69b12cf6432b1446920dceb4484682ab Mon Sep 17 00:00:00 2001
From: Lukas Sismis <lsismis@oisf.net>
Date: Wed, 24 Jul 2024 22:45:20 +0200
Subject: [PATCH 3/4] util-path: remove dead code

---
 src/util-path.c | 36 ------------------------------------
 src/util-path.h |  1 -
 2 files changed, 37 deletions(-)

diff --git a/src/util-path.c b/src/util-path.c
index 34c4cc75ca79..356c4a7727b9 100644
--- a/src/util-path.c
+++ b/src/util-path.c
@@ -117,42 +117,6 @@ char *PathMergeAlloc(const char *const dir, const char *const fname)
     return ret;
 }
 
-/**
- * \brief Wrapper to join a directory and filename and resolve using realpath
- *   _fullpath is used for WIN32
- *
- * \param out_buf output buffer.  Up to PATH_MAX will be written.  Unchanged on exit failure.
- * \param buf_size length of output buffer, must be PATH_MAX
- * \param dir the directory
- * \param fname the filename
- *
- * \retval 0 on success
- * \retval -1 on failure
- */
-int PathJoin(char *out_buf, size_t buf_size, const char *const dir, const char *const fname)
-{
-    SCEnter();
-    if (buf_size != PATH_MAX) {
-        return -1;
-    }
-    if (PathMerge(out_buf, buf_size, dir, fname) != 0) {
-        SCLogError("Could not join filename to path");
-        return -1;
-    }
-    char *tmp_buf = SCRealPath(out_buf, NULL);
-    if (tmp_buf == NULL) {
-        SCLogError("Error resolving path: %s", strerror(errno));
-        return -1;
-    }
-    memset(out_buf, 0, buf_size);
-    size_t ret = strlcpy(out_buf, tmp_buf, buf_size);
-    free(tmp_buf);
-    if (ret >= buf_size) {
-        return -1;
-    }
-    return 0;
-}
-
 /**
  * \brief Wrapper around SCMkDir with default mode arguments.
  */
diff --git a/src/util-path.h b/src/util-path.h
index fee58eabd3c4..b2b2624908dd 100644
--- a/src/util-path.h
+++ b/src/util-path.h
@@ -51,7 +51,6 @@ int PathIsAbsolute(const char *);
 int PathIsRelative(const char *);
 int PathMerge(char *out_buf, size_t buf_size, const char *const dir, const char *const fname);
 char *PathMergeAlloc(const char *const dir, const char *const fname);
-int PathJoin(char *out_buf, size_t buf_len, const char *const dir, const char *const fname);
 int SCDefaultMkDir(const char *path);
 int SCCreateDirectoryTree(const char *path, const bool final);
 bool SCPathExists(const char *path);

From 02921a5fd8b375834edd3684ebc48b4ea8b7a046 Mon Sep 17 00:00:00 2001
From: Lukas Sismis <lsismis@oisf.net>
Date: Wed, 24 Jul 2024 22:46:15 +0200
Subject: [PATCH 4/4] detect-engine: remove commented out code

---
 src/detect-engine.c | 8 --------
 src/detect-engine.h | 1 -
 2 files changed, 9 deletions(-)

diff --git a/src/detect-engine.c b/src/detect-engine.c
index 6502e9936a6f..8412ff9f7eb6 100644
--- a/src/detect-engine.c
+++ b/src/detect-engine.c
@@ -3000,14 +3000,6 @@ bool DetectEngineMpmCachingEnabled(void)
     return (bool)sgh_mpm_caching;
 }
 
-/*
- * getting & (re)setting the internal sig i
- */
-
-//inline uint32_t DetectEngineGetMaxSigId(DetectEngineCtx *de_ctx)
-//{
-//    return de_ctx->signum;
-//}
 const char *DetectEngineMpmCachingGetPath(void)
 {
     const char *strval = NULL;
diff --git a/src/detect-engine.h b/src/detect-engine.h
index b71b992b12f2..2d811fcedfb9 100644
--- a/src/detect-engine.h
+++ b/src/detect-engine.h
@@ -101,7 +101,6 @@ void *DetectThreadCtxGetGlobalKeywordThreadCtx(DetectEngineThreadCtx *det_ctx, i
 TmEcode DetectEngineThreadCtxInit(ThreadVars *, void *, void **);
 TmEcode DetectEngineThreadCtxDeinit(ThreadVars *, void *);
 bool DetectEngineMpmCachingEnabled(void);
-//inline uint32_t DetectEngineGetMaxSigId(DetectEngineCtx *);
 const char *DetectEngineMpmCachingGetPath(void);
 /* faster as a macro than a inline function on my box -- VJ */
 #define DetectEngineGetMaxSigId(de_ctx) ((de_ctx)->signum)