From f742254e310e59505736df07589ba0dcf36bdd24 Mon Sep 17 00:00:00 2001 From: Thibaut Artis Date: Wed, 11 Sep 2024 18:02:14 +0200 Subject: [PATCH 1/2] jail: Try disabling Transparent Hugepage on Linux Disabling Transparent Hugepage has often been the solution to solve hard-to-diagnose instability issues and despite improvements in this area compared to the RHEL6 era, our recommandation is still to avoid THP to this day. In addition to refreshing the documentation on this topic, the Linux jail will tentatively disable THP or warn when it cannot. --- bin/varnishd/mgt/mgt_jail_linux.c | 72 ++++++++++++++++++++--- doc/sphinx/installation/platformnotes.rst | 30 ++++++---- doc/sphinx/reference/varnishd.rst | 7 ++- 3 files changed, 90 insertions(+), 19 deletions(-) diff --git a/bin/varnishd/mgt/mgt_jail_linux.c b/bin/varnishd/mgt/mgt_jail_linux.c index 93e5c2a6d16..05722d64ca6 100644 --- a/bin/varnishd/mgt/mgt_jail_linux.c +++ b/bin/varnishd/mgt/mgt_jail_linux.c @@ -48,11 +48,62 @@ #include "mgt/mgt.h" #include "common/heritage.h" +static int +vjl_set_thp(const char *arg) +{ + int val; + + if (!strcmp(arg, "ignore")) + return (0); + if (!strcmp(arg, "enable")) + val = 0; + else if (!strcmp(arg, "disable")) + val = 1; + else { + ARGV_ERR( + "linux jail: unknown value '%s' for argument transparent_hugepage.\n", + arg); + } + if (prctl(PR_SET_THP_DISABLE, val, 0, 0, 0) != 0) { + MGT_Complain(C_ERR, + "Could not %s Transparent Hugepage: %s (%d)", + arg, VAS_errtxt(errno), errno); + } + return (0); +} + static int vjl_init(char **args) { + char **unix_args; + int ret = 0; + size_t i; + + if (args == NULL) { + /* Autoconfig */ + if (vjl_set_thp("disable") != 0) + return (1); + return (jail_tech_unix.init(NULL)); + } - return jail_tech_unix.init(args); + for (i = 0; args[i] != NULL; i++); + unix_args = calloc(i + 1, sizeof *unix_args); + AN(unix_args); + + for (i = 0; *args != NULL && ret == 0; args++) { + if (!strncmp(*args, "transparent_hugepage=", + sizeof("transparent_hugepage=") - 1)) { + ret = vjl_set_thp((*args) + (sizeof("transparent_hugepage=") - 1)); + } else { + unix_args[i] = *args; + i++; + } + } + + if (ret == 0) + ret = jail_tech_unix.init(unix_args); + free(unix_args); + return (ret); } static void @@ -94,17 +145,24 @@ vjl_make_workdir(const char *dname, const char *what, struct vsb *vsb) vjl_master(JAIL_MASTER_FILE); if (statfs(dname, &info) != 0) { - if (vsb) - VSB_printf(vsb, "Could not stat working directory '%s': %s (%d)\n", dname, VAS_errtxt(errno), errno); - else - MGT_Complain(C_ERR, "Could not stat working directory '%s': %s (%d)", dname, VAS_errtxt(errno), errno); + if (vsb) { + VSB_printf(vsb, + "Could not stat working directory '%s': %s (%d)\n", + dname, VAS_errtxt(errno), errno); + } else { + MGT_Complain(C_ERR, + "Could not stat working directory '%s': %s (%d)", + dname, VAS_errtxt(errno), errno); + } return (1); } if (info.f_type != TMPFS_MAGIC) { if (vsb != NULL) - VSB_printf(vsb, "Working directory not mounted on tmpfs partition\n"); + VSB_printf(vsb, + "Working directory not mounted on tmpfs partition\n"); else - MGT_Complain(C_INFO, "Working directory not mounted on tmpfs partition"); + MGT_Complain(C_INFO, + "Working directory not mounted on tmpfs partition"); } vjl_master(JAIL_MASTER_LOW); return (0); diff --git a/doc/sphinx/installation/platformnotes.rst b/doc/sphinx/installation/platformnotes.rst index 494e4eb9d6d..37b7fe70ef9 100644 --- a/doc/sphinx/installation/platformnotes.rst +++ b/doc/sphinx/installation/platformnotes.rst @@ -11,21 +11,29 @@ On some platforms it is necessary to adjust the operating system before running Varnish on it. The systems and steps known to us are described in this section. -Transparent hugepages on Redhat Enterprise Linux 6 -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Transparent Hugepage on Linux +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -On RHEL6 Transparent Hugepage kernel support is enabled by default. -This is known to cause sporadic crashes of Varnish. +On certain Linux distributions Transparent Hugepage kernel support is enabled +by default. This is known to cause instabilities of Varnish. -It is recommended to disable transparent hugepages on affected -systems. This can be done with -``echo never > /sys/kernel/mm/redhat_transparent_hugepage/enabled`` -(runtime) or by adding "transparent_hugepage=never" to the kernel boot -line in the "/etc/grub.conf" file (persistent). +It is recommended to disable Transparent Hugepage on affected systems. +If Varnish is the only significant service running on this system, this can be +done during runtime with:: -On Debian/Ubuntu systems running 3.2 kernels the default value is "madvise" and -does not need to be changed. + echo never > /sys/kernel/mm/transparent_hugepage/enabled +Alternatively, this can be persisted in your bootloader configuration by adding +``transparent_hugepage=never`` to the kernel command line. + +On other systems the default value is ``madvise`` and does not need to be +changed. Either way, the Linux :ref:`ref-varnishd-opt_j` will try to disable +Transparent Hugepage in the ``varnishd`` process. + +The general recommendation is to mount the working directory in a ``tmpfs`` +partition, usually ``/var/lib/varnish``. By default tmpfs should be mounted with +Transparent Hugepage disabled. Consider mounting the working directory with the +``huge=never`` mount option if that is not the case with your OS vendor. OpenVZ ~~~~~~ diff --git a/doc/sphinx/reference/varnishd.rst b/doc/sphinx/reference/varnishd.rst index a3c445d142d..dc37d4f09eb 100644 --- a/doc/sphinx/reference/varnishd.rst +++ b/doc/sphinx/reference/varnishd.rst @@ -454,13 +454,18 @@ specific options. Available jails are: -j solaris,worker=basic --j +-j ...]> Default on Linux platforms, it extends the UNIX jail with Linux-specific mechanisms: - It warns when *workdir* is not on a ``tmpfs``. - It tries to keep the process dumpable after dropping privileges. + - It tries to disable Transparent Hugepage in ``varnishd`` by default. + +The optional transparent_hugepage argument takes one action among enable, +disable (default) and ignore. Failure to configure Transparent Hugepage is +not a fatal error. -j From f69c16b40820d0a05e080c9b24b2deb8a64a441d Mon Sep 17 00:00:00 2001 From: Thibaut Artis Date: Wed, 25 Sep 2024 10:40:42 +0200 Subject: [PATCH 2/2] jail: Match jail arguments with a macro This commit provides a more elegant way to check jail arguments. It was motivated by this comment https://github.com/varnishcache/varnish-cache/pull/4174#discussion_r1773681054 --- bin/varnishd/mgt/mgt.h | 2 ++ bin/varnishd/mgt/mgt_jail_linux.c | 3 +-- bin/varnishd/mgt/mgt_jail_solaris.c | 2 +- bin/varnishd/mgt/mgt_jail_unix.c | 6 +++--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/bin/varnishd/mgt/mgt.h b/bin/varnishd/mgt/mgt.h index 4c40e7ccf10..4afa7f07110 100644 --- a/bin/varnishd/mgt/mgt.h +++ b/bin/varnishd/mgt/mgt.h @@ -165,6 +165,8 @@ extern const struct jail_tech jail_tech_unix; extern const struct jail_tech jail_tech_solaris; extern const struct jail_tech jail_tech_linux; +#define MATCH_JAIL_ARG(arg, option) (!strncmp(arg, option, sizeof(option) - 1)) + /* mgt_main.c */ extern struct vsb *vident; extern struct VSC_mgt *VSC_C_mgt; diff --git a/bin/varnishd/mgt/mgt_jail_linux.c b/bin/varnishd/mgt/mgt_jail_linux.c index 05722d64ca6..b2d68e8fc21 100644 --- a/bin/varnishd/mgt/mgt_jail_linux.c +++ b/bin/varnishd/mgt/mgt_jail_linux.c @@ -91,8 +91,7 @@ vjl_init(char **args) AN(unix_args); for (i = 0; *args != NULL && ret == 0; args++) { - if (!strncmp(*args, "transparent_hugepage=", - sizeof("transparent_hugepage=") - 1)) { + if (MATCH_JAIL_ARG(*args, "transparent_hugepage=")) { ret = vjl_set_thp((*args) + (sizeof("transparent_hugepage=") - 1)); } else { unix_args[i] = *args; diff --git a/bin/varnishd/mgt/mgt_jail_solaris.c b/bin/varnishd/mgt/mgt_jail_solaris.c index d35d937265e..26d2d7cd97f 100644 --- a/bin/varnishd/mgt/mgt_jail_solaris.c +++ b/bin/varnishd/mgt/mgt_jail_solaris.c @@ -328,7 +328,7 @@ vjs_init(char **args) if (args != NULL && *args != NULL) { for (;*args != NULL; args++) { - if (!strncmp(*args, "worker=", 7)) { + if (MATCH_JAIL_ARG(*args, "worker=")) { user = priv_str_to_set((*args) + 7, ",", &e); if (user == NULL) ARGV_ERR( diff --git a/bin/varnishd/mgt/mgt_jail_unix.c b/bin/varnishd/mgt/mgt_jail_unix.c index 8fb6dfc9a45..d768bf5e4ce 100644 --- a/bin/varnishd/mgt/mgt_jail_unix.c +++ b/bin/varnishd/mgt/mgt_jail_unix.c @@ -132,21 +132,21 @@ vju_init(char **args) ARGV_ERR("Unix Jail: Must be root.\n"); for (;*args != NULL; args++) { - if (!strncmp(*args, "user=", 5)) { + if (MATCH_JAIL_ARG(*args, "user=")) { if (vju_getuid((*args) + 5)) ARGV_ERR( "Unix jail: %s user not found.\n", (*args) + 5); continue; } - if (!strncmp(*args, "workuser=", 9)) { + if (MATCH_JAIL_ARG(*args, "workuser=")) { if (vju_getwrkuid((*args) + 9)) ARGV_ERR( "Unix jail: %s user not found.\n", (*args) + 9); continue; } - if (!strncmp(*args, "ccgroup=", 8)) { + if (MATCH_JAIL_ARG(*args, "ccgroup=")) { if (vju_getccgid((*args) + 8)) ARGV_ERR( "Unix jail: %s group not found.\n",