diff --git a/Dockerfile b/Dockerfile
index 029dca39e..949112ee6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,10 +20,43 @@ RUN pip install poetry==$POETRY_VERSION && \
     ./hack/build-wheels.sh /opt/mlserver/dist && \
     poetry export --with all-runtimes \
         --without-hashes \
+        -E everything \
         --format constraints.txt \
         -o /opt/mlserver/dist/constraints.txt && \
     sed -i 's/\[.*\]//g' /opt/mlserver/dist/constraints.txt
 
+
+# Build native dependencies for tracepoints; 
+# Almalinux is binary-compatible with rhel ubi images but contains repositories
+# with additional devel packages (elfutils-libelf-devel needed here)
+FROM almalinux/9-minimal AS libstapsdt-builder
+SHELL ["/bin/bash", "-c"]
+
+ARG LIBSTAPSDT_VERSION="0.1.1"
+
+# Install libstapsdt dev dependencies
+RUN microdnf update -y && \
+    microdnf install -y \
+        wget \
+        tar \
+        gzip \
+        gcc \
+        make \
+        findutils \
+        elfutils-libelf-devel
+
+# Get libstapsdt sources, compile and install into separate tree
+# We also need to patch the resulting library symlink to be relative so that
+# we may copy the resulting files in a different container directly
+RUN wget "https://github.com/linux-usdt/libstapsdt/archive/refs/tags/v${LIBSTAPSDT_VERSION}.tar.gz" && \
+    tar -xzf v${LIBSTAPSDT_VERSION}.tar.gz && \
+    cd libstapsdt-${LIBSTAPSDT_VERSION} && \
+    make && \
+    make install DESTDIR=/libstapsdt-install && \
+    cd /libstapsdt-install/usr/lib && \
+    readlink libstapsdt.so | sed s+/libstapsdt-install/usr/lib/++ | xargs -I % ln -fs % libstapsdt.so
+
+
 FROM registry.access.redhat.com/ubi9/ubi-minimal
 SHELL ["/bin/bash", "-c"]
 
@@ -53,7 +86,13 @@ RUN microdnf update -y && \
         libgomp \
         mesa-libGL \
         glib2-devel \
-        shadow-utils
+        shadow-utils \
+        elfutils-libelf
+
+# Install libstapsdt
+COPY --from=libstapsdt-builder /libstapsdt-install /
+# Update symlinks & ldconfig cache
+RUN ldconfig
 
 # Install Conda, Python 3.10 and FFmpeg
 RUN microdnf install -y wget && \
@@ -107,7 +146,7 @@ RUN . $CONDA_PATH/etc/profile.d/conda.sh && \
             pip install $_wheel --constraint ./dist/constraints.txt; \
         done \
     fi && \
-    pip install $(ls "./dist/mlserver-"*.whl) --constraint ./dist/constraints.txt && \
+    pip install $(ls "./dist/mlserver-"*.whl)[everything] --constraint ./dist/constraints.txt && \
     rm -f /opt/conda/lib/python3.10/site-packages/spacy/tests/package/requirements.txt && \
     rm -rf /root/.cache/pip
 
diff --git a/licenses/license.txt b/licenses/license.txt
index a1b343cf5..90088caee 100644
--- a/licenses/license.txt
+++ b/licenses/license.txt
@@ -23097,6 +23097,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 
 
+stapsdt
+0.1.1
+MIT License
+UNKNOWN
+
 starlette
 0.27.0
 BSD License
diff --git a/licenses/license_info.csv b/licenses/license_info.csv
index ddd982b99..da89a07b6 100644
--- a/licenses/license_info.csv
+++ b/licenses/license_info.csv
@@ -222,6 +222,7 @@
 "spacy-lookups-data","1.0.3","MIT License"
 "sqlparse","0.4.4","BSD License"
 "srsly","2.4.6","MIT License"
+"stapsdt","0.1.1","MIT License"
 "starlette","0.27.0","BSD License"
 "starlette-exporter","0.16.0","Apache License 2.0"
 "sympy","1.12","BSD License"
diff --git a/licenses/license_info.no_versions.csv b/licenses/license_info.no_versions.csv
index f68aafffe..d7ddf7cc6 100644
--- a/licenses/license_info.no_versions.csv
+++ b/licenses/license_info.no_versions.csv
@@ -222,6 +222,7 @@
 "spacy-lookups-data","MIT License"
 "sqlparse","BSD License"
 "srsly","MIT License"
+"stapsdt","MIT License"
 "starlette","BSD License"
 "starlette-exporter","Apache License 2.0"
 "sympy","BSD License"
diff --git a/mlserver/parallel/system_tracing.py b/mlserver/parallel/system_tracing.py
new file mode 100644
index 000000000..f5f4fb060
--- /dev/null
+++ b/mlserver/parallel/system_tracing.py
@@ -0,0 +1,4 @@
+from ..sys_tracing import SystemTracingProvider
+
+sysTracingProviderWorkerName = "mlserver"
+sys_tracer: SystemTracingProvider = SystemTracingProvider(sysTracingProviderWorkerName)
diff --git a/mlserver/parallel/worker.py b/mlserver/parallel/worker.py
index 5456e139d..84a5f0f0e 100644
--- a/mlserver/parallel/worker.py
+++ b/mlserver/parallel/worker.py
@@ -11,6 +11,7 @@
 from ..registry import MultiModelRegistry
 from ..utils import install_uvloop_event_loop, schedule_with_callback
 from ..logging import configure_logger
+from ..system_tracing import configure_tracepoints
 from ..settings import Settings
 from ..metrics import configure_metrics, model_context
 from ..env import Environment
@@ -23,6 +24,7 @@
 )
 from .utils import terminate_queue, END_OF_QUEUE
 from .logging import logger
+from .system_tracing import sys_tracer
 from .errors import WorkerError
 
 IGNORED_SIGNALS = [signal.SIGINT, signal.SIGTERM, signal.SIGQUIT]
@@ -64,7 +66,11 @@ def run(self):
 
         with ctx:
             install_uvloop_event_loop()
+            configure_tracepoints(sys_tracer, self._settings.tracepoints_enabled)
             configure_logger(self._settings)
+            logger.info(
+                f"Worker [{self.pid}]: enabled {sys_tracer.tracepoints_count} tracepoints"
+            )
             configure_metrics(self._settings)
             self._ignore_signals()
             asyncio.run(self.coro_run())
@@ -196,3 +202,4 @@ async def stop(self):
         self._model_updates.close()
         self._requests.close()
         self._executor.shutdown()
+        sys_tracer.unload()
diff --git a/mlserver/registry.py b/mlserver/registry.py
index 9aa026f19..037178b4f 100644
--- a/mlserver/registry.py
+++ b/mlserver/registry.py
@@ -8,6 +8,7 @@
 from .errors import ModelNotFound
 from .logging import logger
 from .settings import ModelSettings
+from .system_tracing import sys_tracer
 
 ModelInitialiser = Callable[[ModelSettings], MLModel]
 ModelRegistryHook = Callable[[MLModel], Awaitable[MLModel]]
@@ -155,6 +156,7 @@ async def _load_model(self, model: MLModel):
             # appears as a not-ready (i.e. loading) model
             self._register(model)
 
+            sys_tracer.tp_model_load_begin(model.name, model.version)
             for callback in self._on_model_load:
                 # NOTE: Callbacks need to be executed sequentially to ensure that
                 # they go in the right order
@@ -164,6 +166,7 @@ async def _load_model(self, model: MLModel):
             self._register(model)
             model.ready = await model.load()
 
+            sys_tracer.tp_model_load_end(model.name, model.version)
             logger.info(f"Loaded model '{model.name}' succesfully.")
         except Exception:
             logger.info(
@@ -180,7 +183,11 @@ async def _reload_model(self, old_model: MLModel, new_model: MLModel):
         # Loading the model before unloading the old one - this will ensure
         # that at least one is available (sort of mimicking a rolling
         # deployment)
+        sys_tracer.tp_model_reload_begin(
+            new_model.name, new_model.version, old_model.version
+        )
         new_model.ready = await new_model.load()
+        sys_tracer.tp_model_reload_end(new_model.name, new_model.version)
         self._register(new_model)
 
         if old_model == self.default:
@@ -224,6 +231,8 @@ async def _unload_model(self, model: MLModel):
 
         model.ready = not await model.unload()
 
+        sys_tracer.tp_model_unload(model.name, model.version)
+
     def _find_model(self, version: Optional[str] = None) -> Optional[MLModel]:
         if version:
             if version not in self._versions:
diff --git a/mlserver/server.py b/mlserver/server.py
index d82c49080..c7400e963 100644
--- a/mlserver/server.py
+++ b/mlserver/server.py
@@ -18,12 +18,14 @@
 from .metrics import MetricsServer
 from .kafka import KafkaServer
 from .utils import logger
+from .system_tracing import sys_tracer, configure_tracepoints
 
 HANDLED_SIGNALS = [signal.SIGINT, signal.SIGTERM, signal.SIGQUIT]
 
 
 class MLServer:
     def __init__(self, settings: Settings):
+        configure_tracepoints(sys_tracer, settings.tracepoints_enabled)
         self._settings = settings
         self._add_signal_handlers()
 
@@ -54,6 +56,7 @@ def __init__(self, settings: Settings):
         )
 
         self._configure_logger()
+        logger.info(f"MLServer enabled {sys_tracer.tracepoints_count} tracepoints")
         self._create_servers()
 
     def _create_model_registry(self) -> MultiModelRegistry:
@@ -188,3 +191,5 @@ async def stop(self, sig: Optional[int] = None):
 
         if self._metrics_server:
             await self._metrics_server.stop(sig)
+
+        sys_tracer.unload()
diff --git a/mlserver/settings.py b/mlserver/settings.py
index 41fc90fa8..757a953fd 100644
--- a/mlserver/settings.py
+++ b/mlserver/settings.py
@@ -230,10 +230,17 @@ class Config:
     kafka_topic_input: str = "mlserver-input"
     kafka_topic_output: str = "mlserver-output"
 
-    # OpenTelemetry Tracing settings
+    # Tracing settings
+    # OpenTelemetry
     tracing_server: Optional[str] = None
     """Server name used to export OpenTelemetry tracing to collector service."""
 
+    # Enable/disable tracepoints for system tracing (BPF, Systemtap)
+    tracepoints_enabled: Optional[bool] = True
+    """
+    Control the export of static tracepoints for external probing at runtime
+    """
+
     # Custom server settings
     _custom_rest_server_settings: Optional[dict] = None
     _custom_metrics_server_settings: Optional[dict] = None
diff --git a/mlserver/sys_tracing/__init__.py b/mlserver/sys_tracing/__init__.py
new file mode 100644
index 000000000..dd14463dc
--- /dev/null
+++ b/mlserver/sys_tracing/__init__.py
@@ -0,0 +1,12 @@
+"""
+
+"""
+from .provider import SystemTracingProvider
+from .tracepoints import Tracepoint, ArgStatus, MAX_TRACEPOINT_ARGS
+
+__all__ = [
+    "SystemTracingProvider",
+    "Tracepoint",
+    "ArgStatus",
+    "MAX_TRACEPOINT_ARGS",
+]
diff --git a/mlserver/sys_tracing/provider.py b/mlserver/sys_tracing/provider.py
new file mode 100644
index 000000000..4499623b0
--- /dev/null
+++ b/mlserver/sys_tracing/provider.py
@@ -0,0 +1,224 @@
+"""
+A system tracing provider is the entity responsible for registering and managing
+a set of tracepoints that are then visible to external tracing tools. Such tools
+may register probes (e.g. Systemtap probes, BPF code) to snapshot performance
+counters, debug performance issues, link application activity to kernel actions
+or compute various aggregate metrics.
+
+MLServer fires the tracepoints defined by the tracing provider (one of the tp_*
+functions) whenever application-specific events take place, such as starting to
+process an inference request, loading/unloading a model, etc:
+
+```
+sys_trace = SystemTracingProvider()
+sys_trace.generate_native_sdt_tracepoints(settings.tracepoint_settings)
+...
+# A new model gets loaded
+sys_trace.tp_model_load_begin(...)
+model.ready = await model.load()
+sys_trace.tp_model_load_end(...)
+```
+
+Sometimes, the arguments that we want to pass to a tracepoint are obtained by
+some processing, which might be expensive. We do not want to pay this cost even
+when no external probe is attached to that tracepoint. In this case, we can
+first call the `is_active` member function, which only returns true if the
+passed tracepoint has an external probe attached:
+
+```
+if sys_trace.is_active(Tracepoint.inference_begin):
+    # compute arguments
+    pipeline = parse_headers(...)
+
+    # fire tracepoint
+    sys_trace.tp_inference_begin(..., pipeline, ...)
+```
+
+Each tp_* function calls into a native shared library that is generated on the
+fly and dynamically loaded inside the process when calling
+`sys_trace.generate_native_sdt_tracepoints(...)`. In the shared library, a
+tracepoint is just a hook function (a series of nop instructions followed by a
+ret). The code of this hook can be modified at runtime to jump into code
+provided from outside the process (e.g. SystemTap or BPF probes running in
+kernel context).
+
+When no probe is attached, a tracepoint adds almost no overhead (the cost of a
+function call and a branch instruction). When a probe is attached, the process
+will incur a mode switch (~5-10 us) each time the probe fires, plus the overhead
+of the probing code.
+
+At the moment, tracing is only available on x86-64 architectures.
+"""
+from typing import Union
+from importlib import import_module
+
+from ..logging import logger
+from .tracepoints import Tracepoint, ArgStatus, MAX_TRACEPOINT_ARGS
+from .stapsdt_stub import Probe as NopTracepoint
+
+
+class SystemTracingProvider:
+    def __init__(self, name: str):
+        # Tracing is an optional facility, requiring additional runtime
+        # dependencies. Because of this, we initialize this provider with stub
+        # tracepoints which do nothing.
+        #
+        # When tracing is enabled via MLServer settings, the actual tracepoint
+        # insertion will happen during the `create_native_sdt_tracepoints()`
+        # call.
+        stapsdt = import_module(".stapsdt_stub", "mlserver.sys_tracing")
+        self._name = name
+
+        self._provider = stapsdt.Provider(self._name)
+        self._num_native_tracepoints: int = 0
+        self._tracepoint_definition: dict[
+            Tracepoint, Union[stapsdt.Probe, NopTracepoint]
+        ] = {}
+
+        for tp in Tracepoint:
+            self._tracepoint_definition[tp] = NopTracepoint
+
+    def create_native_sdt_tracepoints(self, enable_tracepoints: bool):
+        """
+        Makes the configured tracepoints visible to external tracing programs
+
+        In the underlying implementation, this:
+            - generates native shared library with the tracepoints and a
+              .notes ELF section with metadata used for attaching probes
+            - loads the shared library in the current process
+        """
+        if enable_tracepoints:
+            try:
+                # On import, stapsdt will attempt to dynamically load
+                # libstapsdt.so, which is responsible for dynamically creating a
+                # native ELF library containing the tracepoint functions to
+                # which external probes can be attached to.
+                import stapsdt
+            except Exception:
+                logger.warning(
+                    "Could not enable tracing. Ensure python-stapsdt and libstapsdt.so are installed"
+                )
+                return
+
+            self._provider = stapsdt.Provider(self._name)
+
+            # Register tracepoints with the native stapsdt provider
+            for tp in Tracepoint:
+                arg_types, status = tp.get_arg_types()
+                if arg_types is not None:
+                    self._num_native_tracepoints += 1
+                    self._tracepoint_definition[tp] = self._provider.add_probe(
+                        tp.name, *arg_types
+                    )
+                if status == ArgStatus.TooManyArguments:
+                    logger.warning(
+                        f"The prototype for tracepoint {tp.name} has too many arguments (max: {MAX_TRACEPOINT_ARGS}): \
+                        only the first {MAX_TRACEPOINT_ARGS} will be considered"
+                    )
+                elif status == ArgStatus.NoPrototypeDefined:
+                    logger.warning(
+                        f"Could not register tracepoint {tp.name}, as it has has no defined prototype."
+                    )
+
+            # Generate and load the shared object library containing the native
+            # tracepoints; remove tracepoints if loading fails
+            load_error = self._provider.load()
+            if load_error:
+                self._num_native_tracepoints = 0
+                for tp in Tracepoint:
+                    self._tracepoint_definition[tp] = NopTracepoint
+
+    @property
+    def tracepoints_count(self) -> int:
+        return self._num_native_tracepoints
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_active_for(self, tracepoint: Tracepoint) -> bool:
+        """
+        Returns true only when there are active external probes attached to the
+        given tracepoint
+
+        Importantly, this can be used before calling a tracepoint, when it is
+        expensive to compute some of the arguments passed to it and we want to
+        avoid incurring that cost all the time.
+
+        The overall pattern for calling a tracepoint in this case becomes:
+
+        ```
+        if(sys_trace.is_active(Tracepoint.inference_begin)) {
+            // compute arguments
+            pipeline = headers.parse(...)
+
+            // fire probe
+            sys_trace.tp_inference_begin(..., pipeline, ..)
+        }
+        ```
+
+        In the underlying implementation, one of the nop instructions of the
+        tracepoint hook is replaced by a trap instruction whenever a probe is
+        attached. For non-stub tracepoints, the current function checks for the
+        existence of the trap instruction. Stub tracepoints will always return
+        false.
+        """
+        return self._tracepoint_definition[tracepoint].is_enabled()
+
+    # Explicit function definitions for each tracepoint
+    #
+    # These should be preferred to the generic __call__ below, as they constrain
+    # the number of arguments and explicitly define the tracing interface for
+    # external consumers
+    def tp_model_load_begin(self, model_name, model_version) -> bool:
+        return self._tracepoint_definition[Tracepoint.model_load_begin].fire(
+            model_name, model_version
+        )
+
+    def tp_model_load_end(self, model_name, model_version) -> bool:
+        return self._tracepoint_definition[Tracepoint.model_load_end].fire(
+            model_name, model_version
+        )
+
+    def tp_model_reload_begin(
+        self, model_name, model_version, old_model_version
+    ) -> bool:
+        return self._tracepoint_definition[Tracepoint.model_reload_begin].fire(
+            model_name, model_version, old_model_version
+        )
+
+    def tp_model_reload_end(self, model_name, model_version) -> bool:
+        return self._tracepoint_definition[Tracepoint.model_reload_end].fire(
+            model_name, model_version
+        )
+
+    def tp_model_unload(self, model_name, model_version) -> bool:
+        return self._tracepoint_definition[Tracepoint.model_unload].fire(
+            model_name, model_version
+        )
+
+    def tp_inference_enqueue_req(self, model_name, queue_len) -> bool:
+        return self._tracepoint_definition[Tracepoint.inference_enqueue_req].fire(
+            model_name, queue_len
+        )
+
+    def tp_inference_begin(self, model_name, pipeline, tags, worker_pid) -> bool:
+        return self._tracepoint_definition[Tracepoint.inference_begin].fire(
+            model_name, pipeline, tags, worker_pid
+        )
+
+    def tp_inference_end(self, model_name, pipeline, tags, worker_pid) -> bool:
+        return self._tracepoint_definition[Tracepoint.inference_end].fire(
+            model_name, pipeline, tags, worker_pid
+        )
+
+    # Generic tracepoint event, without constraining the number of arguments
+    # Mostly here for use during the creation of new tracepoints, for which
+    # specialised tp_* functions do not exist.
+    def __call__(self, tracepoint: Tracepoint, *args) -> bool:
+        return self._tracepoint_definition[tracepoint].fire(*args)
+
+    def unload(self) -> bool:
+        for tp in Tracepoint:
+            self._tracepoint_definition[tp] = NopTracepoint
+        return self._provider.unload()
diff --git a/mlserver/sys_tracing/stapsdt_stub.py b/mlserver/sys_tracing/stapsdt_stub.py
new file mode 100644
index 000000000..a53cdf2e9
--- /dev/null
+++ b/mlserver/sys_tracing/stapsdt_stub.py
@@ -0,0 +1,31 @@
+class Provider(object):
+    def __init__(self, name):
+        self._name = name
+        self._provider = None
+        self._probes = []
+
+    def __str__(self):
+        return self.name
+
+    def add_probe(self, name, *args):
+        probe = Probe(self, name, *args)
+        self._probes.append(probe)
+        return probe
+
+    def load(self) -> bool:
+        return False
+
+    def unload(self):
+        return True
+
+
+class Probe:
+    def __init__(self, *args):
+        self._args = args
+
+    def fire(self, *args):
+        return False
+
+    @property
+    def is_enabled(self):
+        return False
diff --git a/mlserver/sys_tracing/tracepoints.py b/mlserver/sys_tracing/tracepoints.py
new file mode 100644
index 000000000..37339c36d
--- /dev/null
+++ b/mlserver/sys_tracing/tracepoints.py
@@ -0,0 +1,186 @@
+"""
+A tracepoint is a statically defined marker placed in code to identify
+application-level events.
+
+From the perspective of the python application, each tracepoint acts like a
+normal function call. When a tracepoint is initialized by the tracing provider,
+the prototype of this function call needs to be specified.
+
+Here, we define the list of all possible tracepoints exposed by MLServer, (the
+`Tracepoint` enum) together with the arguments that can be passed to them
+(returned by `Tracepoint.get_arg_types(self)`) when each type of application
+event occurs.
+
+The list of arguments is statically defined for each tracepoint in
+`ArgTypeMap._prototypes`.
+
+The subset of tracepoints visible to external tracing users can be restricted in
+the MLServer settings, via the `tracepoint_settings.configured_tracepoints`
+option.
+
+See `system_tracing.provider` for more details about the actual tracepoint
+implementation.
+"""
+from typing import Optional, Tuple, List
+from enum import Enum, IntEnum, auto
+
+# The underlying SDT tracepoint implementation currently only works on x86-64
+# architectures. For simplicity, it supports a maximum of 6 tracepoint
+# arguments, as the x86-64 calling convention passes the first 6 function
+# arguments via registers.
+MAX_TRACEPOINT_ARGS = 6
+
+
+class ArgTypes(IntEnum):
+    """
+    Encoding of Tracepoint argument types, as required by libstapsdt
+    """
+
+    # This matches the ArgTypes enum in stapsdt so it should not be changed. It
+    # is re-defined here so as to not depend on stapsdt when tracing is
+    # disabled. The values for each argument type are stable and not expected to
+    # change.
+    noarg = 0
+    uint8 = 1
+    int8 = -1
+    uint16 = 2
+    int16 = -2
+    uint32 = 4
+    int32 = -4
+    uint64 = 8
+    int64 = -8
+
+
+class RequestQueueId(IntEnum):
+    BatchQueue = 1
+    WorkerQueue = 2
+
+
+class ArgTypeMap(IntEnum):
+    """
+    Members of this enum give meaningful names to argument types used by
+    MLServer tracepoints
+    """
+
+    model_name_t = ArgTypes.uint64
+    model_version_t = ArgTypes.uint64
+    old_model_version_t = ArgTypes.uint64
+    pipeline_t = ArgTypes.uint64
+    tags_t = ArgTypes.uint64
+    queue_len_t = ArgTypes.uint32
+    worker_pid_t = ArgTypes.int32
+
+
+class ArgStatus(Enum):
+    Ok = auto()
+    TooManyArguments = auto()
+    NoPrototypeDefined = auto()
+
+
+class Tracepoint(Enum):
+    """
+    The available MLServer tracepoints.
+
+    The tracing provider in `system_tracing.provider` has a number of member
+    functions (`tp_*`) for firing the tracepoints declared here when the
+    corresponding application-level events take place.
+
+    The tracing provider may also be called directly, passing a member of this
+    enum as the first argument, together with the corresponding arguments. The
+    types of the tracepoint arguments are statically defined for each Tracepoint
+    (see `ArgTypeMap._prototypes`)
+
+    Example:
+    ```
+    sys_trace = SystemTracingProvider()
+    sys_trace.generate_native_sdt_tracepoints(tracepoint_settings) ...
+
+    # Fire the model_load_begin tracepoint via explicit member function:
+    trace.tp_model_load_begin(model_name, model_version)
+
+    # The equivalent generic call to be used if the tracepoint does not have
+    # an associated tp_* function (typically, during testing and development of
+    # new tracepoints)
+    sys_trace(Tracepoint.model_load_begin, model_name, model_version)
+    ```
+    """
+
+    model_load_begin = auto()
+    model_load_end = auto()
+    model_reload_begin = auto()
+    model_reload_end = auto()
+    model_unload = auto()
+    inference_enqueue_req = auto()
+    inference_begin = auto()
+    inference_end = auto()
+
+    def all():
+        return {t for t in Tracepoint}
+
+    def none():
+        return set()
+
+    def get_arg_types(self) -> Tuple[Optional[List[int]], ArgStatus]:
+        """
+        Returns a (arg_types_list, status) tuple for the current tracepoint. The
+        arg_types_list, when not None, defines the function prototype of the
+        tracepoint.
+
+        The returned arg_types_list is used by the TracingProvider when
+        registering the tracepoints with the underlying native tracepoint
+        implementation (libstapsdt). Based on this, libstapsdt dynamically
+        generates a shared library in ELF format, with one function
+        corresponding to each tracepoint and an ELF note section describing
+        where the tracepoint arguments can be found (in which registers) at call
+        time.
+
+        Tracepoints may have a maximum of MAX_TRACEPOINT_ARGS arguments. If the
+        defined prototype has more, or if the prototype can not be found, this
+        will be reflected in the returned ArgStatus. Any ArgStatus different
+        from ArgStatus.Ok indicates an error state, albeit some states such as
+        ArgStatus.TooManyArguments are recoverable (for example, by ignoring the
+        additional arguments).
+
+        The list of arguments used by each tracepoint is currently unstable.
+        """
+        if self in ArgTypeMap._prototypes:
+            if len(ArgTypeMap._prototypes[self]) <= MAX_TRACEPOINT_ARGS:
+                return ArgTypeMap._prototypes[self], ArgStatus.Ok
+            else:
+                return (
+                    ArgTypeMap._prototypes[self][0:MAX_TRACEPOINT_ARGS],
+                    ArgStatus.TooManyArguments,
+                )
+        else:
+            return None, ArgStatus.NoPrototypeDefined
+
+
+ArgTypeMap.model_args = [ArgTypeMap.model_name_t, ArgTypeMap.model_version_t]
+ArgTypeMap.model_args_reload = [
+    ArgTypeMap.model_name_t,
+    ArgTypeMap.model_version_t,
+    ArgTypeMap.old_model_version_t,
+]
+ArgTypeMap.queue_args = [ArgTypeMap.model_name_t, ArgTypeMap.queue_len_t]
+ArgTypeMap.infer_args = [
+    ArgTypeMap.model_name_t,
+    ArgTypeMap.pipeline_t,
+    ArgTypeMap.tags_t,
+    ArgTypeMap.worker_pid_t,
+]
+
+# TODO(lucian): Stabilise the tracepoint argument definitions after writing a
+# number of external applications using them
+#
+# Together with the tracepoint names, the prototypes form the public interface
+# against which external probing code is developed.
+ArgTypeMap._prototypes = {
+    Tracepoint.model_load_begin: ArgTypeMap.model_args,
+    Tracepoint.model_load_end: ArgTypeMap.model_args,
+    Tracepoint.model_reload_begin: ArgTypeMap.model_args_reload,
+    Tracepoint.model_reload_end: ArgTypeMap.model_args,
+    Tracepoint.model_unload: ArgTypeMap.model_args,
+    Tracepoint.inference_enqueue_req: ArgTypeMap.queue_args,
+    Tracepoint.inference_begin: ArgTypeMap.infer_args,
+    Tracepoint.inference_end: ArgTypeMap.infer_args,
+}
diff --git a/mlserver/system_tracing.py b/mlserver/system_tracing.py
new file mode 100644
index 000000000..30bb6d255
--- /dev/null
+++ b/mlserver/system_tracing.py
@@ -0,0 +1,10 @@
+from .sys_tracing import SystemTracingProvider
+
+sysTracingProviderName = "mlserver"
+sys_tracer: SystemTracingProvider = SystemTracingProvider(sysTracingProviderName)
+
+
+def configure_tracepoints(sys_tracer: SystemTracingProvider, enable_tracepoints: bool):
+    if sys_tracer.tracepoints_count == 0:
+        sys_tracer.create_native_sdt_tracepoints(enable_tracepoints)
+    return sys_tracer
diff --git a/poetry.lock b/poetry.lock
index 24cc35f53..de66900db 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3313,7 +3313,7 @@ sqlserver = ["mlflow-dbstore"]
 
 [[package]]
 name = "mlserver-alibi-detect"
-version = "1.4.0.dev2"
+version = "1.4.0.dev3"
 description = "Alibi-Detect runtime for MLServer"
 optional = false
 python-versions = "^3.8.1,<3.12"
@@ -3330,7 +3330,7 @@ url = "runtimes/alibi-detect"
 
 [[package]]
 name = "mlserver-alibi-explain"
-version = "1.4.0.dev2"
+version = "1.4.0.dev3"
 description = "Alibi-Explain runtime for MLServer"
 optional = false
 python-versions = "^3.8.1,<3.12"
@@ -3348,7 +3348,7 @@ url = "runtimes/alibi-explain"
 
 [[package]]
 name = "mlserver-huggingface"
-version = "1.4.0.dev2"
+version = "1.4.0.dev3"
 description = "HuggingFace runtime for MLServer"
 optional = false
 python-versions = "^3.8.1,<3.12"
@@ -3367,7 +3367,7 @@ url = "runtimes/huggingface"
 
 [[package]]
 name = "mlserver-lightgbm"
-version = "1.4.0.dev2"
+version = "1.4.0.dev3"
 description = "LightGBM runtime for MLServer"
 optional = false
 python-versions = "^3.8.1,<3.12"
@@ -3385,7 +3385,7 @@ url = "runtimes/lightgbm"
 
 [[package]]
 name = "mlserver-mlflow"
-version = "1.4.0.dev2"
+version = "1.4.0.dev3"
 description = "MLflow runtime for MLServer"
 optional = false
 python-versions = "^3.8.1,<3.12"
@@ -3402,7 +3402,7 @@ url = "runtimes/mlflow"
 
 [[package]]
 name = "mlserver-sklearn"
-version = "1.4.0.dev2"
+version = "1.4.0.dev3"
 description = "Scikit-Learn runtime for MLServer"
 optional = false
 python-versions = "^3.8.1,<3.12"
@@ -3420,7 +3420,7 @@ url = "runtimes/sklearn"
 
 [[package]]
 name = "mlserver-xgboost"
-version = "1.4.0.dev2"
+version = "1.4.0.dev3"
 description = "XGBoost runtime for MLServer"
 optional = false
 python-versions = "^3.8.1,<3.12"
@@ -6629,6 +6629,16 @@ files = [
 [package.dependencies]
 catalogue = ">=2.0.3,<2.1.0"
 
+[[package]]
+name = "stapsdt"
+version = "0.1.1"
+description = "Create USDT probes and instrument your Python application dynamically"
+optional = true
+python-versions = "*"
+files = [
+    {file = "stapsdt-0.1.1.tar.gz", hash = "sha256:fabc2ca82a7c7f7307bd5326a1a1cace720aa6e1682a53afe09f6cb20218e636"},
+]
+
 [[package]]
 name = "starlette"
 version = "0.27.0"
@@ -8044,7 +8054,10 @@ docs = ["Sphinx", "repoze.sphinx.autointerface"]
 test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 
+[extras]
+tracepoints = ["stapsdt"]
+
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8.1,<3.12"
-content-hash = "551a2a87f59f2fc131dee0ee11d90d909ffbb69677f281598683f3af51d9f4b1"
+content-hash = "d51b9f064c500b745d3530d434f454e25ae88df74f4b7129545a5dc1f36ba8ce"
diff --git a/pyproject.toml b/pyproject.toml
index 5f2c0da88..6a053a67b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,6 +59,7 @@ tritonclient = {version = "^2.24", extras = ["http"]}
 aiofiles = "*"
 orjson = "*"
 uvloop = {version = "*", markers = "sys_platform != 'win32' and (sys_platform != 'cygwin' and platform_python_implementation != 'PyPy')"}
+stapsdt = {version = "^0.1.1", optional = true}
 
 ## The importlib-resources backport is required to use some
 ## functionality added in Python 3.10
@@ -69,6 +70,10 @@ opentelemetry-sdk = "^1.18.0"
 opentelemetry-exporter-otlp-proto-grpc = "^1.18.0"
 opentelemetry-instrumentation-grpc = "^0.39b0"
 
+[tool.poetry.extras]
+tracepoints = ["stapsdt"]
+everything = ["stapsdt"]
+
 [tool.poetry.group.dev.dependencies]
 datamodel-code-generator = "0.21.0"
 grpcio-tools = "1.48.1"
diff --git a/tox.ini b/tox.ini
index 7375dcb6f..e3d20c625 100644
--- a/tox.ini
+++ b/tox.ini
@@ -36,7 +36,8 @@ commands =
 commands_pre =
     poetry install --sync --no-root \
         --with all-runtimes \
-        --with all-runtimes-dev
+        --with all-runtimes-dev \
+        --all-extras
 commands =
   pip-licenses \
     --from=mixed \