diff --git a/common/include/arch/x86_64/cpu.h b/common/include/arch/x86_64/cpu.h
index ad42bcbab4..f55c7a21fa 100644
--- a/common/include/arch/x86_64/cpu.h
+++ b/common/include/arch/x86_64/cpu.h
@@ -45,8 +45,6 @@ enum extended_state_sub_leaf {
 #define PROC_FREQ_LEAF                         0x16
 #define AMX_TILE_INFO_LEAF                     0x1D
 #define AMX_TMUL_INFO_LEAF                     0x1E
-#define HYPERVISOR_INFO_LEAF             0x40000000
-#define HYPERVISOR_VMWARE_TIME_LEAF      0x40000010
 #define MAX_INPUT_EXT_VALUE_LEAF         0x80000000
 #define EXT_SIGNATURE_AND_FEATURES_LEAF  0x80000001
 #define CPU_BRAND_LEAF                   0x80000002
diff --git a/pal/src/host/linux-sgx/enclave_ocalls.c b/pal/src/host/linux-sgx/enclave_ocalls.c
index c7259056e9..43afc62ffb 100644
--- a/pal/src/host/linux-sgx/enclave_ocalls.c
+++ b/pal/src/host/linux-sgx/enclave_ocalls.c
@@ -1766,47 +1766,85 @@ int ocall_shutdown(int sockfd, int how) {
     return retval;
 }
 
-int ocall_gettime(uint64_t* microsec_ptr) {
+int ocall_gettime(uint64_t* microsec_ptr, uint64_t* tsc_ptr) {
     int retval = 0;
-    struct ocall_gettime* ocall_gettime_args;
+    struct ocall_gettime* ocall_gettime_args = NULL;
 
     void* old_ustack = sgx_prepare_ustack();
     ocall_gettime_args = sgx_alloc_on_ustack_aligned(sizeof(*ocall_gettime_args),
                                                      alignof(*ocall_gettime_args));
     if (!ocall_gettime_args) {
-        sgx_reset_ustack(old_ustack);
-        return -EPERM;
+        retval = -EPERM;
+        goto out;
     }
 
     /* Last seen time value. This guards against time rewinding. */
-    static uint64_t last_microsec = 0;
-    uint64_t last_microsec_before_ocall = __atomic_load_n(&last_microsec, __ATOMIC_ACQUIRE);
+    struct gettime_guard
+    {
+        spinlock_t lock;
+        uint64_t microsec;
+        uint64_t tsc;
+    };
+    static struct gettime_guard last_value = {
+        .lock = INIT_SPINLOCK_UNLOCKED,
+        .microsec = 0,
+        .tsc = 0,
+    };
+
+    spinlock_lock(&last_value.lock);
+    uint64_t last_microsec_before_ocall = last_value.microsec;
+    uint64_t last_tsc_before_ocall = last_value.tsc;
+    spinlock_unlock(&last_value.lock);
+
+    uint64_t tsc_before_ocall = 0;
+    uint64_t tsc_after_ocall = 0;
     do {
+        tsc_before_ocall = get_tsc();
         retval = sgx_exitless_ocall(OCALL_GETTIME, ocall_gettime_args);
+        tsc_after_ocall = get_tsc();
     } while (retval == -EINTR);
 
     if (retval < 0 && retval != -EINVAL && retval != -EPERM) {
         retval = -EPERM;
     }
+    if (retval != 0) {
+        goto out;
+    }
 
-    if (!retval) {
-        uint64_t microsec = COPY_UNTRUSTED_VALUE(&ocall_gettime_args->microsec);
-        if (microsec < last_microsec_before_ocall) {
-            /* Probably a malicious host. */
-            log_error("OCALL_GETTIME returned time value smaller than in the previous call");
-            _PalProcessExit(1);
-        }
-        /* Update `last_microsec`. */
-        uint64_t expected_microsec = last_microsec_before_ocall;
-        while (expected_microsec < microsec) {
-            if (__atomic_compare_exchange_n(&last_microsec, &expected_microsec, microsec,
-                                            /*weak=*/true, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) {
-                break;
-            }
-        }
-        *microsec_ptr = MAX(microsec, expected_microsec);
+    /* detect malicious host - time and tsc must monotonically increase */
+    uint64_t new_microsec = COPY_UNTRUSTED_VALUE(&ocall_gettime_args->microsec);
+    uint64_t new_tsc = COPY_UNTRUSTED_VALUE(&ocall_gettime_args->tsc);
+    if (new_microsec < last_microsec_before_ocall) {
+        log_error("OCALL_GETTIME returned time value smaller than in the previous call");
+        _PalProcessExit(1);
     }
+    if (new_tsc <= last_tsc_before_ocall) {
+        log_error("OCALL_GETTIME returned TSC value smaller than in previous call");
+        _PalProcessExit(1);
+    }
+    if (!((tsc_before_ocall < new_tsc) && (new_tsc < tsc_after_ocall))) {
+        log_error("OCALL_GETTIME returned TSC value inconsistent with values taken within the enclave");
+        _PalProcessExit(1);
+    }
+
+    /* Update `last_value` guard. */
+    spinlock_lock(&last_value.lock);
+    if (last_value.tsc < new_tsc) {
+        last_value.microsec = new_microsec;
+        last_value.tsc = new_tsc;
+    } else {
+        /* there was a more recent ocall */
+        new_microsec = last_value.microsec;
+        new_tsc = last_value.tsc;
+    }
+    spinlock_unlock(&last_value.lock);
 
+    *microsec_ptr = new_microsec;
+    if (tsc_ptr != NULL) {
+        *tsc_ptr = new_tsc;
+    }
+
+out:
     sgx_reset_ustack(old_ustack);
     return retval;
 }
diff --git a/pal/src/host/linux-sgx/enclave_ocalls.h b/pal/src/host/linux-sgx/enclave_ocalls.h
index bf8b05f849..a0fcefc84b 100644
--- a/pal/src/host/linux-sgx/enclave_ocalls.h
+++ b/pal/src/host/linux-sgx/enclave_ocalls.h
@@ -89,7 +89,7 @@ int ocall_create_process(size_t nargs, const char** args, uintptr_t (*reserved_m
 
 int ocall_futex(uint32_t* uaddr, int op, int val, uint64_t* timeout_us);
 
-int ocall_gettime(uint64_t* microsec);
+int ocall_gettime(uint64_t* microsec, uint64_t* tsc);
 
 void ocall_sched_yield(void);
 
diff --git a/pal/src/host/linux-sgx/host_ocalls.c b/pal/src/host/linux-sgx/host_ocalls.c
index eba742e55c..fdf1fa9aa0 100644
--- a/pal/src/host/linux-sgx/host_ocalls.c
+++ b/pal/src/host/linux-sgx/host_ocalls.c
@@ -603,8 +603,10 @@ static long sgx_ocall_shutdown(void* args) {
 static long sgx_ocall_gettime(void* args) {
     struct ocall_gettime* ocall_gettime_args = args;
     struct timeval tv;
+    uint64_t tsc = get_tsc();
     DO_SYSCALL(gettimeofday, &tv, NULL);
-    ocall_gettime_args->microsec = tv.tv_sec * (uint64_t)1000000 + tv.tv_usec;
+    ocall_gettime_args->microsec = tv.tv_sec * (uint64_t)1000000UL + tv.tv_usec;
+    ocall_gettime_args->tsc = tsc;
     return 0;
 }
 
diff --git a/pal/src/host/linux-sgx/meson.build b/pal/src/host/linux-sgx/meson.build
index e10defc768..347ad98924 100644
--- a/pal/src/host/linux-sgx/meson.build
+++ b/pal/src/host/linux-sgx/meson.build
@@ -86,6 +86,7 @@ libpal_sgx = shared_library('pal',
     'pal_sockets.c',
     'pal_streams.c',
     'pal_threading.c',
+    'utils/fast_clock.c',
     pal_sgx_asm_offsets_h,
     pal_common_sources,
     pal_linux_common_sources_enclave,
diff --git a/pal/src/host/linux-sgx/pal_exception.c b/pal/src/host/linux-sgx/pal_exception.c
index 6bc890bdc4..7d38aa3229 100644
--- a/pal/src/host/linux-sgx/pal_exception.c
+++ b/pal/src/host/linux-sgx/pal_exception.c
@@ -111,11 +111,14 @@ static void save_pal_context(PAL_CONTEXT* ctx, sgx_cpu_context_t* uc,
     }
 }
 
+#include "utils/fast_clock.h"
+
+int g_atomic_is_rdtsc_emulated = 0;
 static void emulate_rdtsc_and_print_warning(sgx_cpu_context_t* uc) {
     if (FIRST_TIME()) {
-        /* if we end up emulating RDTSC/RDTSCP instruction, we cannot use invariant TSC */
-        extern uint64_t g_tsc_hz;
-        g_tsc_hz = 0;
+        /* if we end up emulating RDTSC/RDTSCP instruction, we cannot use TSC-based clock emulation */
+        __atomic_store_n(&g_atomic_is_rdtsc_emulated, 1, __ATOMIC_SEQ_CST);
+        fast_clock_disable(&g_fast_clock);
         log_warning("all RDTSC/RDTSCP instructions are emulated (imprecisely) via gettime() "
                     "syscall.");
     }
diff --git a/pal/src/host/linux-sgx/pal_linux.h b/pal/src/host/linux-sgx/pal_linux.h
index 914d75f1a4..50a2555b03 100644
--- a/pal/src/host/linux-sgx/pal_linux.h
+++ b/pal/src/host/linux-sgx/pal_linux.h
@@ -99,8 +99,6 @@ void _PalExceptionHandler(uint32_t trusted_exit_info_,
                           uint32_t untrusted_external_event, sgx_cpu_context_t* uc,
                           PAL_XREGS_STATE* xregs_state, sgx_arch_exinfo_t* exinfo);
 
-void init_tsc(void);
-
 int init_cpuid(void);
 
 int init_enclave(void);
diff --git a/pal/src/host/linux-sgx/pal_main.c b/pal/src/host/linux-sgx/pal_main.c
index a5d32f47f7..3257e6dcff 100644
--- a/pal/src/host/linux-sgx/pal_main.c
+++ b/pal/src/host/linux-sgx/pal_main.c
@@ -31,6 +31,7 @@
 #include "pal_topology.h"
 #include "toml.h"
 #include "toml_utils.h"
+#include "utils/fast_clock.h"
 
 struct pal_linuxsgx_state g_pal_linuxsgx_state;
 
@@ -407,7 +408,6 @@ static int import_and_init_extra_runtime_domain_names(struct pal_dns_host_conf*
 extern void* g_enclave_base;
 extern void* g_enclave_top;
 extern bool g_allowed_files_warn;
-extern uint64_t g_tsc_hz;
 extern size_t g_unused_tcs_pages_num;
 
 static int print_warnings_on_insecure_configs(PAL_HANDLE parent_process) {
@@ -552,11 +552,17 @@ static int print_warnings_on_insecure_configs(PAL_HANDLE parent_process) {
     return ret;
 }
 
-static void print_warning_on_invariant_tsc(PAL_HANDLE parent_process) {
-    if (!parent_process && !g_tsc_hz) {
-        /* Warn only in the first process. */
-        log_warning("Could not set up Invariant TSC (CPU is too old or you run on a VM that does "
-                    "not expose corresponding CPUID leaves). This degrades performance.");
+static void print_warnings_on_disabled_clock_emulation(PAL_HANDLE parent_process) {
+    if (parent_process) {
+        return; /* Warn only in the first process */
+    }
+
+    /* We call get_tsc() early in pal_linux_main -
+     * if rdtsc opcode is emulated, the error handler disables fast-clock
+     */
+    if (!fast_clock_is_enabled(&g_fast_clock)) {
+        log_warning("Could not enable fast clock emulation (CPU is too old or VM does "
+                    "not support TSC within SGX enclave). This degrades performance.");
     }
 }
 
@@ -581,8 +587,7 @@ static void post_callback(void) {
         ocall_exit(1, /*is_exitgroup=*/true);
     }
 
-    print_warning_on_invariant_tsc(g_pal_common_state.parent_process);
-
+    print_warnings_on_disabled_clock_emulation(g_pal_common_state.parent_process);
     print_warnings_on_invalid_dns_host_conf(g_pal_common_state.parent_process);
 }
 
@@ -725,12 +730,11 @@ noreturn void pal_linux_main(void* uptr_libpal_uri, size_t libpal_uri_len, void*
 
     SET_ENCLAVE_TCB(ready_for_exceptions, 1UL);
 
-    /* initialize "Invariant TSC" HW feature for fast and accurate gettime and immediately probe
-     * RDTSC instruction inside SGX enclave (via dummy get_tsc) -- it is possible that
-     * the CPU supports invariant TSC but doesn't support executing RDTSC inside SGX enclave, in
-     * this case the SIGILL exception is generated and leads to emulate_rdtsc_and_print_warning()
-     * which unsets invariant TSC, and we end up falling back to the slower ocall_gettime() */
-    init_tsc();
+    /* We implement a "fast-path" clock that is emulated internally using x86 RDTSC instruction.
+     * It is possible that the CPU does not support the RDTSC instruction within SGX enclave,
+     * in this case the SIGILL exception is generated and leads to emulate_rdtsc_and_print_warning()
+     * which disables the TSC based clock, and we end up falling back to the slower ocall_gettime()
+     */
     (void)get_tsc(); /* must be after `ready_for_exceptions=1` since it may generate SIGILL */
 
     ret = init_cpuid();
diff --git a/pal/src/host/linux-sgx/pal_misc.c b/pal/src/host/linux-sgx/pal_misc.c
index d86a1b21e8..d2b118b337 100644
--- a/pal/src/host/linux-sgx/pal_misc.c
+++ b/pal/src/host/linux-sgx/pal_misc.c
@@ -22,208 +22,10 @@
 #include "spinlock.h"
 #include "toml_utils.h"
 #include "topo_info.h"
-
-/* The timeout of 50ms was found to be a safe TSC drift correction periodicity based on results
- * from multiple systems. Any higher or lower could pose risks of negative time drift or
- * performance hit respectively.
- */
-#define TSC_REFINE_INIT_TIMEOUT_USECS 50000
-
-uint64_t g_tsc_hz = 0; /* TSC frequency for fast and accurate time ("invariant TSC" HW feature) */
-static uint64_t g_start_tsc = 0;
-static uint64_t g_start_usec = 0;
-static seqlock_t g_tsc_lock = INIT_SEQLOCK_UNLOCKED;
-
-static bool is_tsc_usable(void) {
-    uint32_t words[CPUID_WORD_NUM];
-    _PalCpuIdRetrieve(INVARIANT_TSC_LEAF, 0, words);
-    return words[CPUID_WORD_EDX] & (1 << 8);
-}
-
-/* return TSC frequency or 0 if invariant TSC is not supported */
-static uint64_t get_tsc_hz_baremetal(void) {
-    uint32_t words[CPUID_WORD_NUM];
-
-    /*
-     * Based on "Time Stamp Counter and Nominal Core Crystal Clock Information" leaf, calculate TSC
-     * frequency as ECX * EBX / EAX, where
-     *   - EAX is denominator of the TSC/"core crystal clock" ratio,
-     *   - EBX is numerator of the TSC/"core crystal clock" ratio,
-     *   - ECX is core crystal clock (nominal) frequency in Hz.
-     */
-    _PalCpuIdRetrieve(TSC_FREQ_LEAF, 0, words);
-    if (!words[CPUID_WORD_EAX] || !words[CPUID_WORD_EBX]) {
-        /* TSC/core crystal clock ratio is not enumerated, can't use RDTSC for accurate time */
-        return 0;
-    }
-
-    if (words[CPUID_WORD_ECX] > 0) {
-        /* cast to 64-bit first to prevent integer overflow */
-        return (uint64_t)words[CPUID_WORD_ECX] * words[CPUID_WORD_EBX] / words[CPUID_WORD_EAX];
-    }
-
-    /* some Intel CPUs do not report nominal frequency of crystal clock, let's calculate it
-     * based on Processor Frequency Information Leaf (CPUID 16H); this leaf always exists if
-     * TSC Frequency Leaf exists; logic is taken from Linux 5.11's arch/x86/kernel/tsc.c */
-    _PalCpuIdRetrieve(PROC_FREQ_LEAF, 0, words);
-    if (!words[CPUID_WORD_EAX]) {
-        /* processor base frequency (in MHz) is not enumerated, can't calculate frequency */
-        return 0;
-    }
-
-    /* processor base frequency is in MHz but we need to return TSC frequency in Hz; cast to 64-bit
-     * first to prevent integer overflow */
-    return (uint64_t)words[CPUID_WORD_EAX] * 1000000;
-}
-
-/* return TSC frequency or 0 if invariant TSC is not supported */
-static uint64_t get_tsc_hz_hypervisor(void) {
-    uint32_t words[CPUID_WORD_NUM];
-
-    /*
-     * We rely on the Generic CPUID space for hypervisors:
-     *   - 0x40000000: EAX: The maximum input value for CPUID supported by the hypervisor
-     *   -             EBX, ECX, EDX: Hypervisor vendor ID signature (hypervisor_id)
-     *
-     * If we detect QEMU/KVM or Cloud Hypervisor/KVM (hypervisor_id = "KVMKVMKVM") or VMWare
-     * ("VMwareVMware"), then we assume that leaf 0x40000010 contains virtual TSC frequency in kHz
-     * in EAX. We check hypervisor_id because leaf 0x40000010 is not standardized and e.g. Microsoft
-     * Hyper-V may use it for other purposes.
-     *
-     * Relevant materials:
-     * - https://github.com/qemu/qemu/commit/9954a1582e18b03ddb66f6c892dccf2c3508f4b2
-     * - qemu/target/i386/cpu.h, qemu/target/i386/cpu.c, qemu/target/i386/kvm/kvm.c sources
-     * - https://github.com/freebsd/freebsd-src/blob/9df6eea/sys/x86/x86/identcpu.c#L1372-L1377 (for
-     *   the list of hypervisor_id values)
-     */
-    _PalCpuIdRetrieve(HYPERVISOR_INFO_LEAF, 0, words);
-
-    bool is_kvm    = words[CPUID_WORD_EBX] == 0x4b4d564b
-                         && words[CPUID_WORD_ECX] == 0x564b4d56
-                         && words[CPUID_WORD_EDX] == 0x0000004d;
-    bool is_vmware = words[CPUID_WORD_EBX] == 0x61774d56
-                         && words[CPUID_WORD_ECX] == 0x4d566572
-                         && words[CPUID_WORD_EDX] == 0x65726177;
-
-    if (!is_kvm && !is_vmware) {
-        /* not a hypervisor that contains "virtual TSC frequency" in leaf 0x40000010 */
-        return 0;
-    }
-
-    if (words[CPUID_WORD_EAX] < HYPERVISOR_VMWARE_TIME_LEAF) {
-        /* virtual TSC frequency is not available */
-        return 0;
-    }
-
-    _PalCpuIdRetrieve(HYPERVISOR_VMWARE_TIME_LEAF, 0, words);
-    if (!words[CPUID_WORD_EAX]) {
-        /* TSC frequency (in kHz) is not enumerated, can't calculate frequency */
-        return 0;
-    }
-
-    /* TSC frequency is in kHz but we need to return TSC frequency in Hz; cast to 64-bit first to
-     * prevent integer overflow */
-    return (uint64_t)words[CPUID_WORD_EAX] * 1000;
-}
-
-/* initialize the data structures used for date/time emulation using TSC */
-void init_tsc(void) {
-    if (!is_tsc_usable())
-        return;
-
-    g_tsc_hz = get_tsc_hz_baremetal();
-    if (g_tsc_hz)
-        return;
-
-    /* hypervisors may not expose crystal-clock frequency CPUID leaves, so instead try
-     * hypervisor-special synthetic CPUID leaf 0x40000010 (VMWare-style Timing Information) */
-    g_tsc_hz = get_tsc_hz_hypervisor();
-    if (g_tsc_hz)
-        return;
-}
+#include "utils/fast_clock.h"
 
 int _PalSystemTimeQuery(uint64_t* out_usec) {
-    int ret;
-
-    if (!g_tsc_hz) {
-        /* RDTSC is not allowed or no Invariant TSC feature -- fallback to the slow ocall */
-        return ocall_gettime(out_usec);
-    }
-
-    uint32_t seq;
-    uint64_t start_tsc;
-    uint64_t start_usec;
-    do {
-        seq = read_seqbegin(&g_tsc_lock);
-        start_tsc  = g_start_tsc;
-        start_usec = g_start_usec;
-    } while (read_seqretry(&g_tsc_lock, seq));
-
-    uint64_t usec = 0;
-    /* Last seen RDTSC-calculated time value. This guards against time rewinding. */
-    static uint64_t last_usec = 0;
-    if (start_tsc > 0 && start_usec > 0) {
-        /* baseline TSC/usec pair was initialized, can calculate time via RDTSC (but should be
-         * careful with integer overflow during calculations) */
-        uint64_t diff_tsc = get_tsc() - start_tsc;
-        if (diff_tsc < UINT64_MAX / 1000000) {
-            uint64_t diff_usec = diff_tsc * 1000000 / g_tsc_hz;
-            if (diff_usec < TSC_REFINE_INIT_TIMEOUT_USECS) {
-                /* less than TSC_REFINE_INIT_TIMEOUT_USECS passed from the previous update of
-                 * TSC/usec pair (time drift is contained), use the RDTSC-calculated time */
-                usec = start_usec + diff_usec;
-                if (usec < start_usec)
-                    return -PAL_ERROR_OVERFLOW;
-
-                /* It's simply `last_usec = max(last_usec, usec)`, but executed atomically. */
-                uint64_t expected_usec = __atomic_load_n(&last_usec, __ATOMIC_ACQUIRE);
-                while (expected_usec < usec) {
-                    if (__atomic_compare_exchange_n(&last_usec, &expected_usec, usec,
-                                                    /*weak=*/true, __ATOMIC_RELEASE,
-                                                    __ATOMIC_ACQUIRE)) {
-                        break;
-                    }
-                }
-
-                *out_usec = MAX(usec, expected_usec);
-                return 0;
-            }
-        }
-    }
-
-    /* if we are here, either the baseline TSC/usec pair was not yet initialized or too much time
-     * passed since the previous TSC/usec update, so let's refresh them to contain the time drift */
-    uint64_t tsc_cyc1 = get_tsc();
-    ret = ocall_gettime(&usec);
-    if (ret < 0)
-        return -PAL_ERROR_DENIED;
-    uint64_t tsc_cyc2 = get_tsc();
-
-    uint64_t last_recorded_rdtsc = __atomic_load_n(&last_usec, __ATOMIC_ACQUIRE);
-    if (usec < last_recorded_rdtsc) {
-        /* new OCALL-obtained timestamp (`usec`) is "back in time" than the last recorded timestamp
-         * from RDTSC (`last_recorded_rdtsc`); this can happen if the actual host time drifted
-         * backwards compared to the RDTSC time. */
-         usec = last_recorded_rdtsc;
-    }
-
-    /* we need to match the OCALL-obtained timestamp (`usec`) with the RDTSC-obtained number of
-     * cycles (`tsc_cyc`); since OCALL is a time-consuming operation, we estimate `tsc_cyc` as a
-     * mid-point between the RDTSC values obtained right-before and right-after the OCALL. */
-    uint64_t tsc_cyc = tsc_cyc1 + (tsc_cyc2 - tsc_cyc1) / 2;
-    if (tsc_cyc < tsc_cyc1)
-        return -PAL_ERROR_OVERFLOW;
-
-    /* refresh the baseline data if no other thread updated g_start_tsc */
-    write_seqbegin(&g_tsc_lock);
-    if (g_start_tsc < tsc_cyc) {
-        g_start_tsc  = tsc_cyc;
-        g_start_usec = usec;
-    }
-    write_seqend(&g_tsc_lock);
-
-    *out_usec = usec;
-    return 0;
+    return fast_clock_get_time(&g_fast_clock, out_usec);
 }
 
 static uint32_t g_extended_feature_flags_max_supported_sub_leaves = 0;
@@ -512,10 +314,8 @@ static const struct cpuid_leaf cpuid_known_leaves[] = {
     {.leaf = 0x1F, .zero_subleaf = false, .cache = false}, /* Intel V2 Ext Topology Enumeration */
     /* basic CPUID leaf functions end here */
 
-    /* hypervisor-specific CPUID leaf functions (0x40000000 - 0x400000FF) start here */
-    {.leaf = 0x40000000, .zero_subleaf = true, .cache = true},  /* CPUID Info */
-    {.leaf = 0x40000010, .zero_subleaf = true, .cache = true},  /* VMWare-style Timing Info */
-    /* NOTE: currently only the above two leaves are used, see also get_tsc_hz_hypervisor() */
+    /* hypervisor-specific CPUID leaf functions (0x40000000 - 0x400000FF) */
+    /* not used, see code below */
 
     /* invalid CPUID leaf functions (no existing or future CPU will return any meaningful
      * information in these leaves) occupy 0x40000100 - 0x4FFFFFFF -- they are treated the same as
diff --git a/pal/src/host/linux-sgx/pal_ocall_types.h b/pal/src/host/linux-sgx/pal_ocall_types.h
index 793282c81e..20d3a1e44e 100644
--- a/pal/src/host/linux-sgx/pal_ocall_types.h
+++ b/pal/src/host/linux-sgx/pal_ocall_types.h
@@ -294,6 +294,7 @@ struct ocall_shutdown {
 
 struct ocall_gettime {
     uint64_t microsec;
+    uint64_t tsc;
 };
 
 struct ocall_poll {
diff --git a/pal/src/host/linux-sgx/utils/fast_clock.c b/pal/src/host/linux-sgx/utils/fast_clock.c
new file mode 100644
index 0000000000..f9b202616f
--- /dev/null
+++ b/pal/src/host/linux-sgx/utils/fast_clock.c
@@ -0,0 +1,364 @@
+#include "api.h"
+#include "cpu.h"
+#include "enclave_ocalls.h"
+#include "pal_internal.h"
+#include "utils/fast_clock.h"
+
+/**
+ * FastClock
+ *
+ * The purpose of this module is to provide a fast sgx implementation for gettimeofday().
+ *    - What this does: avoids OCALL on every gettimeofday() invocation. Given a "ground truth"
+ *      timepoint, we can calculate the current time directly inside the enclave.
+ *    - What this doesn't do: this solution does *NOT* provide a trusted time implementation.
+ *      This still relies on the untrusted host time.
+ *
+ * In order to calculate the current time inside the enclave, we need the following:
+ *   1. t0 - a point in time that all fast clock times will be calculated against.
+ *   2. tsc0 - the clock cycle counter for that point in time
+ *   3. clock_frequency - how many clock cycles do we have per second. The tsc value is synced
+ *      between all cores.
+ * Using the above, given the current tsc we can calculate the current time.
+ *
+ * Note: old SGX enclaves (prior to SGX2) do not support using the `rdtsc` opcode to read the TSC.
+ *
+ * *** Implementation ***
+ *
+ * FastClock is implemented as a state machine. This was done since we don't have a good portable
+ * way to get the cpu clock frequency. So, our general strategy is to simply "calculate" it, by
+ * comparing two timeval values and their corresponding tsc values.
+ *
+ * The naive way of making this calculation is to take two timepoints during initialization with a
+ * "sleep" in between. Instead, we're letting the program run "organically", and using the time
+ * that passes between calls to gettimeofday() as our sleep. This means FastClock will perform an
+ * OCALL when needed, and calculate the time internally when it can.
+ *
+ * FastClock has the following states:
+ *
+ *  INIT┌─►CALIBRATING┌─►RDTSC──►RDTSC_RECALIBRATE─┐
+ *      │             │                            │
+ *      └─►DISABLED   └────────────────────────────┘
+ *
+ *   1. INIT - this is the initial state for fast_clock. All calls are OCALLs
+ *      a. check if rdtsc() is allowed from sgx within the current enclave (-> DISABLED otherwise)
+ *      b. take the initial t0 and tsc0 values used for calibration (-> CALIBRATING)
+ *   2. DISABLED - slow path, all calls will be OCALLs
+ *   3. CALIBRATING - wait for some time to pass so we can calculate the clock_frequency
+ *      a. OCALL to get the current time
+ *      b. if "enough" time has passed since t0, we calculate clock_frequency (-> RDTSC)
+ *   4. RDTSC - fast path, time calculation is done within the enclave
+ *      a. calculate the current time by using clock_frequency, t0 and the tsc value taken within
+ *         the enclave.
+ *      b. if a "long" time has passed since we last synced with the host, OCALL to get new values
+ *         for t0 and tsc0 to reduce divergence (-> RDTSC_RECALIBRATING)
+ *   5. RDTSC_RECALIBRATE - similar to CALIBRATING, calculate an updated clock_frequency
+ *      a. since we have a previous calculation of clock_frequency, we still use the "fast path" to
+ *         calculate the time within the enclave.
+ *      b. when enough time has passed to re-calculate the frequency, we OCALL to get a second
+ *         "ground truth" and calculate a new clock_frequency (-> RDTSC)
+ *
+ * *** Thread safety ***
+ *
+ * As far as multithreading goes, we had the following goals. We wanted the solution to give
+ * consistent times between all threads. This means FastClock state can't be thread local, and
+ * needs to be thread safe. And since this is a performance optimization, we need this to be
+ * lockless (and definitely no OCALLs other than gettimeofday).
+ *
+ * To achieve the above, we use the following data structures.
+ *
+ *   1. fast_clock_timepoint - this contains all the internal state needed by FastClock to
+ *      calculate the time as discussed above. FastClock internally has *two* timepoints, which
+ *      are used in round-robin (alternating).
+ *   2. fast_clock_desc - this is read and written to atomically, which is how the lockless
+ *      thread safety is implemented. The descriptor contains:
+ *      - The current "state" of the FastClock state machine.
+ *      - The round-robin index of the timepoint that is currently in use.
+ *      - A flag that guards state transitions, in case of concurrent calls only a single thread
+ *        should calculate the new timepoint data and transition the state.
+ *
+ * By using an atomic descriptor and round-robin timepoints, we can make sure only a single thread
+ * is changing the timepoint values, and no one can read "intermediate" state. We will only store
+ * the new descriptor pointing to the "next" state and timepoint after it's usable.
+ *
+ * Note: in theory this is not thread safe, as we can have the following -
+ *   1. Thread A reads descriptor, starts flow using timepoint #0, then context switch.
+ *   2. Some time passes and we transition to timepoint #1.
+ *   3. Some more time passes and we transition back to timepoint #0.
+ *   4. Thread A wakes up and reads inconsistent state in timepoint #0. At the worst case this might
+ *      lead to negative \ max time.
+ * In practice this will never happen, since a long time passes between transitioning timepoints.
+ */
+
+
+/**
+ * We got these values experimentally (on azure dc#sv3 machines, SGX2 secure compute) -
+ *   1. increasing CALIBRATION_TIME beyond 1sec doesn't increase the accuracy of the calculated
+ *      clock frequency or times,
+ *   2. 120 seconds keeps the time-drift with host time typically in the 50us range, and very rarely
+ *      at the 1ms range.
+ *
+ * Note, time drift can vary, "ground truth" values can be "bad" and offset the calculation. This is
+ * true regardless of the numbers we choose or the implementation (as long as we rely on OCALLing to
+ * tell the time). The recalibration interval is used to offset this.
+ */
+#define RDTSC_CALIBRATION_TIME          ((uint64_t)1 * TIME_US_IN_S)
+#define RDTSC_RECALIBRATION_INTERVAL    ((uint64_t)120 * TIME_US_IN_S)
+
+typedef enum
+{
+    FC_STATE_RDTSC,
+    FC_STATE_RDTSC_RECALIBRATE,
+    FC_STATE_CALIBRATING,
+    FC_STATE_INIT,
+
+    FC_STATE_RDTSC_DISABLED,
+} fast_clock_state;
+
+fast_clock g_fast_clock = {
+    .atomic_descriptor = {
+        .state = FC_STATE_INIT,
+        .timepoint_index = 0,
+        .state_changing = 0,
+    },
+    .time_points = { [0 ... FC_NUM_TIMEPOINTS-1] = {
+        .clock_freq = 0,
+        .tsc0 = 0,
+        .t0_usec = 0,
+        .expiration_usec = 0,
+    }}
+};
+
+
+static inline fast_clock_desc advance_state(fast_clock_desc curr, fast_clock_state new_state, bool advance_timepoint)
+{
+    fast_clock_desc new_descriptor = {
+        .state = new_state,
+        .timepoint_index = advance_timepoint ? curr.timepoint_index + 1  : curr.timepoint_index,
+        .state_changing = 0,
+    };
+    return new_descriptor;
+}
+
+static inline bool is_expired(const fast_clock_timepoint* timepoint, uint64_t now_usec)
+{
+    return (timepoint->expiration_usec < now_usec);
+}
+
+static inline void calc_time(const fast_clock_timepoint* timepoint, uint64_t* time_usec)
+{
+    uint64_t tsc = get_tsc();
+    uint64_t dtsc = tsc - timepoint->tsc0;
+    uint64_t dt_usec = (dtsc * TIME_US_IN_S) / timepoint->clock_freq;
+    *time_usec = timepoint->t0_usec + dt_usec;
+}
+
+static inline void reset_clock_frequency(fast_clock_timepoint* timepoint, uint64_t tsc, uint64_t time_usec)
+{
+    // calculate clock frequency in Hz
+    uint64_t dt_usec = time_usec - timepoint->t0_usec;
+    uint64_t dtsc = tsc - timepoint->tsc0;
+    timepoint->clock_freq = (dtsc * TIME_US_IN_S) / dt_usec;
+}
+
+static inline long reset_timepoint(fast_clock_timepoint* timepoint)
+{
+    int ret = ocall_gettime(&timepoint->t0_usec, &timepoint->tsc0);
+    return ret;
+}
+
+static inline void reset_expiration(fast_clock_timepoint* timepoint, uint64_t next_expiration)
+{
+    timepoint->expiration_usec = timepoint->t0_usec + next_expiration;
+}
+
+static inline bool set_change_state_guard(fast_clock* fast_clock, fast_clock_desc descriptor)
+{
+    if (descriptor.state_changing != 0) {
+        return false;
+    }
+
+    fast_clock_desc state_change_guard_desc = descriptor;
+    state_change_guard_desc.state_changing = 1;
+    return __atomic_compare_exchange_n(
+        &fast_clock->atomic_descriptor.desc, &descriptor.desc, state_change_guard_desc.desc,
+        /*weak=*/false, __ATOMIC_RELAXED, __ATOMIC_RELAXED
+    );
+}
+
+static inline fast_clock_timepoint* get_timepoint(fast_clock* fast_clock, fast_clock_desc descriptor)
+{
+    return &fast_clock->time_points[descriptor.timepoint_index];
+}
+
+static bool is_rdtsc_available(void) {
+    // we just need to check if rdtsc opcode is emulated (otherwise using it is really slow) -
+    extern int g_atomic_is_rdtsc_emulated;
+
+    // "optimistic path", assume rdtsc() was called at least once
+    int is_emulated = __atomic_load_n(&g_atomic_is_rdtsc_emulated, __ATOMIC_SEQ_CST);
+    if (is_emulated) {
+        return false;
+    }
+
+    // make sure the is_emulated guard is initialized
+    (void)get_tsc();
+    is_emulated = __atomic_load_n(&g_atomic_is_rdtsc_emulated, __ATOMIC_SEQ_CST);
+    return !is_emulated;
+}
+
+static int handle_state_rdtsc_disabled(uint64_t* time_usec)
+{
+    // slow path - OCALL to get time
+    return ocall_gettime(time_usec, NULL);
+}
+
+static int handle_state_init(fast_clock* fast_clock, fast_clock_desc descriptor, uint64_t* time_usec)
+{
+    if (!set_change_state_guard(fast_clock, descriptor)) {
+        return handle_state_rdtsc_disabled(time_usec);
+    }
+
+    if (!is_rdtsc_available()) {
+        fast_clock_desc next_desc = advance_state(descriptor, FC_STATE_RDTSC_DISABLED, false);
+        __atomic_store_n(&fast_clock->atomic_descriptor.desc, next_desc.desc, __ATOMIC_RELAXED);
+        return handle_state_rdtsc_disabled(time_usec);
+    }
+
+    fast_clock_desc next_desc = advance_state(descriptor, FC_STATE_CALIBRATING, false);
+    fast_clock_timepoint* timepoint = get_timepoint(fast_clock, next_desc);
+    int ret = reset_timepoint(timepoint);
+
+    // gettimeofday failed - restore descriptor
+    if (ret != 0) {
+        __atomic_store_n(&fast_clock->atomic_descriptor.desc, descriptor.desc, __ATOMIC_RELAXED);
+        return ret;
+    }
+
+    // advance state
+    reset_expiration(timepoint, RDTSC_CALIBRATION_TIME);
+    __atomic_store_n(&fast_clock->atomic_descriptor.desc, next_desc.desc, __ATOMIC_RELEASE);
+
+    // output results from the timepoint
+    *time_usec = timepoint->t0_usec;
+    return ret;
+}
+
+static int handle_state_calibrating(fast_clock* fast_clock, fast_clock_desc descriptor, uint64_t* time_usec)
+{
+    // all callers in this state will perform an OCALL - no need to set the change_state_guard before OCALLing
+    uint64_t tmp_tsc = 0;
+    int ret = ocall_gettime(time_usec, &tmp_tsc);
+    if (ret != 0) {
+        return ret;
+    }
+
+    fast_clock_timepoint* timepoint = get_timepoint(fast_clock, descriptor);
+    if (!is_expired(timepoint, *time_usec) || !set_change_state_guard(fast_clock, descriptor)) {
+        return ret;
+    }
+
+    // calculate the clock_freq and advance state
+    reset_clock_frequency(timepoint, tmp_tsc, *time_usec);
+    reset_expiration(timepoint, RDTSC_RECALIBRATION_INTERVAL);
+    fast_clock_desc new_desc = advance_state(descriptor, FC_STATE_RDTSC, false);
+    __atomic_store_n(&fast_clock->atomic_descriptor.desc, new_desc.desc, __ATOMIC_RELEASE);
+
+    return ret;
+}
+
+static inline int handle_state_rdtsc(fast_clock* fast_clock, fast_clock_desc descriptor, uint64_t* time_usec)
+{
+    fast_clock_timepoint* timepoint = get_timepoint(fast_clock, descriptor);
+
+    // fast path - calculate time with rdtsc
+    calc_time(timepoint, time_usec);
+    bool should_advance = is_expired(timepoint, *time_usec);
+    if (!should_advance || !set_change_state_guard(fast_clock, descriptor)) {
+        return 0;
+    }
+
+    // acquire the state_change_guard and prepare the next state (get new ground truth timepoint)
+    fast_clock_desc next_desc = advance_state(descriptor, FC_STATE_RDTSC_RECALIBRATE, true);
+    fast_clock_timepoint* next_timepoint = get_timepoint(fast_clock, next_desc);
+
+    int ret = reset_timepoint(next_timepoint);
+    if (ret != 0) {
+        // gettimeofday failed - restore the state_change_guard and return
+        __atomic_store_n(&fast_clock->atomic_descriptor.desc, descriptor.desc, __ATOMIC_RELAXED);
+        return ret;
+    }
+
+    // use current clock freq until RDTSC_CALIBRATE state ends and the new clock_freq can be calculated
+    next_timepoint->clock_freq = timepoint->clock_freq;
+    reset_expiration(next_timepoint, RDTSC_CALIBRATION_TIME);
+    __atomic_store_n(&fast_clock->atomic_descriptor.desc, next_desc.desc, __ATOMIC_RELEASE);
+
+    return ret;
+}
+
+static inline int handle_state_rdtsc_recalibrate(fast_clock* fast_clock, fast_clock_desc descriptor, uint64_t* time_usec)
+{
+    fast_clock_timepoint* timepoint = get_timepoint(fast_clock, descriptor);
+
+    // fast path - calculate time with rdtsc
+    calc_time(timepoint, time_usec);
+    if (!is_expired(timepoint, *time_usec) || !set_change_state_guard(fast_clock, descriptor)) {
+        return 0;
+    }
+
+    uint64_t tsc = 0;
+    int ret = ocall_gettime(time_usec, &tsc);
+    if (ret != 0) {
+        __atomic_store_n(&fast_clock->atomic_descriptor.desc, descriptor.desc, __ATOMIC_RELAXED);
+        return ret;
+    }
+
+    reset_clock_frequency(timepoint, tsc, *time_usec);
+    reset_expiration(timepoint, RDTSC_RECALIBRATION_INTERVAL);
+    fast_clock_desc next_desc = advance_state(descriptor, FC_STATE_RDTSC, false);
+    __atomic_store_n(&fast_clock->atomic_descriptor.desc, next_desc.desc, __ATOMIC_RELEASE);
+
+    return ret;
+}
+
+int fast_clock_get_time(fast_clock* fast_clock, uint64_t* time_usec)
+{
+    fast_clock_desc descriptor = {
+        .desc = __atomic_load_n(&fast_clock->atomic_descriptor.desc, __ATOMIC_ACQUIRE),
+    };
+    switch (descriptor.state)
+    {
+    case FC_STATE_RDTSC:
+        return handle_state_rdtsc(fast_clock, descriptor, time_usec);
+    case FC_STATE_RDTSC_RECALIBRATE:
+        return handle_state_rdtsc_recalibrate(fast_clock, descriptor, time_usec);
+    case FC_STATE_CALIBRATING:
+        return handle_state_calibrating(fast_clock, descriptor, time_usec);
+    case FC_STATE_INIT:
+        return handle_state_init(fast_clock, descriptor, time_usec);
+    case FC_STATE_RDTSC_DISABLED:
+    default:
+        return handle_state_rdtsc_disabled(time_usec);
+    }
+}
+
+bool fast_clock_is_enabled(const fast_clock* fast_clock)
+{
+    fast_clock_desc descriptor = {
+        .desc = __atomic_load_n(&fast_clock->atomic_descriptor.desc, __ATOMIC_RELAXED),
+    };
+    return (descriptor.state != FC_STATE_RDTSC_DISABLED);
+}
+
+void fast_clock_disable(fast_clock* fast_clock)
+{
+    /* We need to busy-loop until the state change guard is acquired here - since fast-clock
+     * might be in the midst of transitioning states. We can't simply store the DISABLED state. */
+    fast_clock_desc descriptor;
+    do {
+        descriptor.desc = __atomic_load_n(&fast_clock->atomic_descriptor.desc, __ATOMIC_ACQUIRE);
+    } while(!set_change_state_guard(fast_clock, descriptor));
+
+    fast_clock_desc disabled_desc = advance_state(descriptor, FC_STATE_RDTSC_DISABLED, false);
+    __atomic_store_n(&fast_clock->atomic_descriptor.desc, disabled_desc.desc, __ATOMIC_RELEASE);
+}
diff --git a/pal/src/host/linux-sgx/utils/fast_clock.h b/pal/src/host/linux-sgx/utils/fast_clock.h
new file mode 100644
index 0000000000..6d85452101
--- /dev/null
+++ b/pal/src/host/linux-sgx/utils/fast_clock.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+#include "api.h"
+
+
+#define _FC_NUM_TIMEPOINT_BITS  (1)
+#define FC_NUM_TIMEPOINTS       (1<<_FC_NUM_TIMEPOINT_BITS)
+
+typedef union
+{
+    struct
+    {
+        uint16_t state              : 4;
+        uint16_t timepoint_index    : _FC_NUM_TIMEPOINT_BITS;
+        uint16_t _pad0              : (16 - _FC_NUM_TIMEPOINT_BITS - 5);
+        uint16_t state_changing     : 1;
+    };
+
+    uint16_t desc;
+} fast_clock_desc;
+
+static_assert(_FC_NUM_TIMEPOINT_BITS >= 1, "timepoint_index must have at minimum 1-bit");
+static_assert(_FC_NUM_TIMEPOINT_BITS + 5 <= 16, "timepoint_index uses too many bits");
+static_assert(sizeof(fast_clock_desc) == sizeof(uint16_t), "fast_clock_desc size mismatch");
+
+typedef struct
+{
+    uint64_t clock_freq;
+    uint64_t tsc0;
+    uint64_t t0_usec;
+    uint64_t expiration_usec;
+} fast_clock_timepoint;
+
+typedef struct
+{
+    fast_clock_desc atomic_descriptor;
+    fast_clock_timepoint time_points[FC_NUM_TIMEPOINTS];
+} fast_clock;
+
+extern fast_clock g_fast_clock;
+
+int fast_clock_get_time(fast_clock* fast_clock, uint64_t* time_micros);
+bool fast_clock_is_enabled(const fast_clock* fast_clock);
+void fast_clock_disable(fast_clock* fast_clock);