From 9ef75ebc18ab9b390e7c248d6068237d433a4b20 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Wed, 26 Oct 2022 07:21:13 -0700 Subject: [PATCH] [PAL/Linux-SGX] Add Invariant TSC query fallback for hypervisors Some hypervisors (like QEMU with KVM) do not expose CPUID leaves 0x15 and 0x16 (Core Crystal Clock/Process Frequency). Instead, hypervisor-specific synthetic CPUID leaf 0x40000010 shows TSC frequency. Unfortunately, leaf 0x40000010 is not standardized, and some other hypervisor (e.g. MS Hyper-V) could use this leaf for something else other than TSC frequency. To work around this, we check the `hypervisor_id` value in leaf 0x40000000, and only use 0x40000010 if the value is "KVMKVMKVM" (that's how QEMU with KVM identifies itself) or "VMwareVMware". To date, we know that VMWare, QEMU/KVM and Cloud Hypervisor/KVM expose this TSC-frequency leaf 0x40000010. MS Hyper-V does not expose this leaf. We don't know about other hypervisors. Note that QEMU must start the VM with CPU flags `+invtsc,+vmware-cpuid-freq` to expose required CPUID leaves. Signed-off-by: Dmitrii Kuvaiskii --- common/include/arch/x86_64/cpu.h | 2 + pal/src/host/linux-sgx/pal_linux.h | 2 - pal/src/host/linux-sgx/pal_main.c | 11 ++ pal/src/host/linux-sgx/pal_misc.c | 156 ++++++++++++++++++++--------- 4 files changed, 124 insertions(+), 47 deletions(-) diff --git a/common/include/arch/x86_64/cpu.h b/common/include/arch/x86_64/cpu.h index 8eaeef1c01..52a912b55f 100644 --- a/common/include/arch/x86_64/cpu.h +++ b/common/include/arch/x86_64/cpu.h @@ -44,6 +44,8 @@ enum extended_state_sub_leaf { #define PROC_FREQ_LEAF 0x16 #define AMX_TILE_INFO_LEAF 0x1D #define AMX_TMUL_INFO_LEAF 0x1E +#define HYPERVISOR_INFO_LEAF 0x40000000 +#define HYPERVISOR_VMWARE_TIME_LEAF 0x40000010 #define MAX_INPUT_EXT_VALUE_LEAF 0x80000000 #define EXT_SIGNATURE_AND_FEATURES_LEAF 0x80000001 #define CPU_BRAND_LEAF 0x80000002 diff --git a/pal/src/host/linux-sgx/pal_linux.h b/pal/src/host/linux-sgx/pal_linux.h index 643b3533bd..35b523f594 100644 --- a/pal/src/host/linux-sgx/pal_linux.h +++ b/pal/src/host/linux-sgx/pal_linux.h @@ -101,8 +101,6 @@ void _PalExceptionHandler(unsigned int exit_info, sgx_cpu_context_t* uc, * its underlying type. */ void _PalHandleExternalEvent(long event_, sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state); -bool is_tsc_usable(void); -uint64_t get_tsc_hz(void); void init_tsc(void); int init_cpuid(void); diff --git a/pal/src/host/linux-sgx/pal_main.c b/pal/src/host/linux-sgx/pal_main.c index 7bc9f5d8db..a444a2541e 100644 --- a/pal/src/host/linux-sgx/pal_main.c +++ b/pal/src/host/linux-sgx/pal_main.c @@ -382,6 +382,7 @@ static int import_and_init_extra_runtime_domain_names(struct pal_dns_host_conf* extern void* g_enclave_base; extern void* g_enclave_top; extern bool g_allowed_files_warn; +extern uint64_t g_tsc_hz; static int print_warnings_on_insecure_configs(PAL_HANDLE parent_process) { int ret; @@ -519,11 +520,21 @@ static int print_warnings_on_insecure_configs(PAL_HANDLE parent_process) { return ret; } +static void print_warning_on_invariant_tsc(PAL_HANDLE parent_process) { + if (!parent_process && !g_tsc_hz) { + /* Warn only in the first process. */ + log_warning("Could not set up Invariant TSC (CPU is too old or you run on a VM that does " + "not expose corresponding CPUID leaves). This degrades performance."); + } +} + static void post_callback(void) { if (print_warnings_on_insecure_configs(g_pal_common_state.parent_process) < 0) { log_error("Cannot parse the manifest (while checking for insecure configurations)"); ocall_exit(1, /*is_exitgroup=*/true); } + + print_warning_on_invariant_tsc(g_pal_common_state.parent_process); } __attribute_no_sanitize_address diff --git a/pal/src/host/linux-sgx/pal_misc.c b/pal/src/host/linux-sgx/pal_misc.c index da92d2c9ed..d86a1b21e8 100644 --- a/pal/src/host/linux-sgx/pal_misc.c +++ b/pal/src/host/linux-sgx/pal_misc.c @@ -34,13 +34,112 @@ static uint64_t g_start_tsc = 0; static uint64_t g_start_usec = 0; static seqlock_t g_tsc_lock = INIT_SEQLOCK_UNLOCKED; -/** - * Initialize the data structures used for date/time emulation using TSC - */ -void init_tsc(void) { - if (is_tsc_usable()) { - g_tsc_hz = get_tsc_hz(); +static bool is_tsc_usable(void) { + uint32_t words[CPUID_WORD_NUM]; + _PalCpuIdRetrieve(INVARIANT_TSC_LEAF, 0, words); + return words[CPUID_WORD_EDX] & (1 << 8); +} + +/* return TSC frequency or 0 if invariant TSC is not supported */ +static uint64_t get_tsc_hz_baremetal(void) { + uint32_t words[CPUID_WORD_NUM]; + + /* + * Based on "Time Stamp Counter and Nominal Core Crystal Clock Information" leaf, calculate TSC + * frequency as ECX * EBX / EAX, where + * - EAX is denominator of the TSC/"core crystal clock" ratio, + * - EBX is numerator of the TSC/"core crystal clock" ratio, + * - ECX is core crystal clock (nominal) frequency in Hz. + */ + _PalCpuIdRetrieve(TSC_FREQ_LEAF, 0, words); + if (!words[CPUID_WORD_EAX] || !words[CPUID_WORD_EBX]) { + /* TSC/core crystal clock ratio is not enumerated, can't use RDTSC for accurate time */ + return 0; + } + + if (words[CPUID_WORD_ECX] > 0) { + /* cast to 64-bit first to prevent integer overflow */ + return (uint64_t)words[CPUID_WORD_ECX] * words[CPUID_WORD_EBX] / words[CPUID_WORD_EAX]; + } + + /* some Intel CPUs do not report nominal frequency of crystal clock, let's calculate it + * based on Processor Frequency Information Leaf (CPUID 16H); this leaf always exists if + * TSC Frequency Leaf exists; logic is taken from Linux 5.11's arch/x86/kernel/tsc.c */ + _PalCpuIdRetrieve(PROC_FREQ_LEAF, 0, words); + if (!words[CPUID_WORD_EAX]) { + /* processor base frequency (in MHz) is not enumerated, can't calculate frequency */ + return 0; + } + + /* processor base frequency is in MHz but we need to return TSC frequency in Hz; cast to 64-bit + * first to prevent integer overflow */ + return (uint64_t)words[CPUID_WORD_EAX] * 1000000; +} + +/* return TSC frequency or 0 if invariant TSC is not supported */ +static uint64_t get_tsc_hz_hypervisor(void) { + uint32_t words[CPUID_WORD_NUM]; + + /* + * We rely on the Generic CPUID space for hypervisors: + * - 0x40000000: EAX: The maximum input value for CPUID supported by the hypervisor + * - EBX, ECX, EDX: Hypervisor vendor ID signature (hypervisor_id) + * + * If we detect QEMU/KVM or Cloud Hypervisor/KVM (hypervisor_id = "KVMKVMKVM") or VMWare + * ("VMwareVMware"), then we assume that leaf 0x40000010 contains virtual TSC frequency in kHz + * in EAX. We check hypervisor_id because leaf 0x40000010 is not standardized and e.g. Microsoft + * Hyper-V may use it for other purposes. + * + * Relevant materials: + * - https://github.com/qemu/qemu/commit/9954a1582e18b03ddb66f6c892dccf2c3508f4b2 + * - qemu/target/i386/cpu.h, qemu/target/i386/cpu.c, qemu/target/i386/kvm/kvm.c sources + * - https://github.com/freebsd/freebsd-src/blob/9df6eea/sys/x86/x86/identcpu.c#L1372-L1377 (for + * the list of hypervisor_id values) + */ + _PalCpuIdRetrieve(HYPERVISOR_INFO_LEAF, 0, words); + + bool is_kvm = words[CPUID_WORD_EBX] == 0x4b4d564b + && words[CPUID_WORD_ECX] == 0x564b4d56 + && words[CPUID_WORD_EDX] == 0x0000004d; + bool is_vmware = words[CPUID_WORD_EBX] == 0x61774d56 + && words[CPUID_WORD_ECX] == 0x4d566572 + && words[CPUID_WORD_EDX] == 0x65726177; + + if (!is_kvm && !is_vmware) { + /* not a hypervisor that contains "virtual TSC frequency" in leaf 0x40000010 */ + return 0; + } + + if (words[CPUID_WORD_EAX] < HYPERVISOR_VMWARE_TIME_LEAF) { + /* virtual TSC frequency is not available */ + return 0; + } + + _PalCpuIdRetrieve(HYPERVISOR_VMWARE_TIME_LEAF, 0, words); + if (!words[CPUID_WORD_EAX]) { + /* TSC frequency (in kHz) is not enumerated, can't calculate frequency */ + return 0; } + + /* TSC frequency is in kHz but we need to return TSC frequency in Hz; cast to 64-bit first to + * prevent integer overflow */ + return (uint64_t)words[CPUID_WORD_EAX] * 1000; +} + +/* initialize the data structures used for date/time emulation using TSC */ +void init_tsc(void) { + if (!is_tsc_usable()) + return; + + g_tsc_hz = get_tsc_hz_baremetal(); + if (g_tsc_hz) + return; + + /* hypervisors may not expose crystal-clock frequency CPUID leaves, so instead try + * hypervisor-special synthetic CPUID leaf 0x40000010 (VMWare-style Timing Information) */ + g_tsc_hz = get_tsc_hz_hypervisor(); + if (g_tsc_hz) + return; } int _PalSystemTimeQuery(uint64_t* out_usec) { @@ -413,8 +512,13 @@ static const struct cpuid_leaf cpuid_known_leaves[] = { {.leaf = 0x1F, .zero_subleaf = false, .cache = false}, /* Intel V2 Ext Topology Enumeration */ /* basic CPUID leaf functions end here */ + /* hypervisor-specific CPUID leaf functions (0x40000000 - 0x400000FF) start here */ + {.leaf = 0x40000000, .zero_subleaf = true, .cache = true}, /* CPUID Info */ + {.leaf = 0x40000010, .zero_subleaf = true, .cache = true}, /* VMWare-style Timing Info */ + /* NOTE: currently only the above two leaves are used, see also get_tsc_hz_hypervisor() */ + /* invalid CPUID leaf functions (no existing or future CPU will return any meaningful - * information in these leaves) occupy 40000000 - 4FFFFFFFH -- they are treated the same as + * information in these leaves) occupy 0x40000100 - 0x4FFFFFFF -- they are treated the same as * unrecognized leaves, see code below */ /* extended CPUID leaf functions start here */ @@ -672,44 +776,6 @@ ssize_t read_file_buffer(const char* filename, char* buf, size_t buf_size) { return n; } -bool is_tsc_usable(void) { - uint32_t words[CPUID_WORD_NUM]; - _PalCpuIdRetrieve(INVARIANT_TSC_LEAF, 0, words); - return words[CPUID_WORD_EDX] & 1 << 8; -} - -/* return TSC frequency or 0 if invariant TSC is not supported */ -uint64_t get_tsc_hz(void) { - uint32_t words[CPUID_WORD_NUM]; - - _PalCpuIdRetrieve(TSC_FREQ_LEAF, 0, words); - if (!words[CPUID_WORD_EAX] || !words[CPUID_WORD_EBX]) { - /* TSC/core crystal clock ratio is not enumerated, can't use RDTSC for accurate time */ - return 0; - } - - if (words[CPUID_WORD_ECX] > 0) { - /* calculate TSC frequency as core crystal clock frequency (EAX) * EBX / EAX; cast to 64-bit - * first to prevent integer overflow */ - uint64_t ecx_hz = words[CPUID_WORD_ECX]; - return ecx_hz * words[CPUID_WORD_EBX] / words[CPUID_WORD_EAX]; - } - - /* some Intel CPUs do not report nominal frequency of crystal clock, let's calculate it - * based on Processor Frequency Information Leaf (CPUID 16H); this leaf always exists if - * TSC Frequency Leaf exists; logic is taken from Linux 5.11's arch/x86/kernel/tsc.c */ - _PalCpuIdRetrieve(PROC_FREQ_LEAF, 0, words); - if (!words[CPUID_WORD_EAX]) { - /* processor base frequency (in MHz) is not enumerated, can't calculate frequency */ - return 0; - } - - /* processor base frequency is in MHz but we need to return TSC frequency in Hz; cast to 64-bit - * first to prevent integer overflow */ - uint64_t base_frequency_mhz = words[CPUID_WORD_EAX]; - return base_frequency_mhz * 1000000; -} - int _PalRandomBitsRead(void* buffer, size_t size) { uint32_t rand; for (size_t i = 0; i < size; i += sizeof(rand)) {