Skip to content

Commit

Permalink
[PAL/Linux-SGX] Add Invariant TSC query fallback for hypervisors
Browse files Browse the repository at this point in the history
Some hypervisors (like QEMU with KVM) do not expose CPUID leaves 0x15
and 0x16 (Core Crystal Clock/Process Frequency). Instead,
hypervisor-specific synthetic CPUID leaf 0x40000010 shows TSC frequency.

Unfortunately, leaf 0x40000010 is not standardized, and some other
hypervisor (e.g. MS Hyper-V) could use this leaf for something else
other than TSC frequency. To work around this, we check the
`hypervisor_id` value in leaf 0x40000000, and only use 0x40000010 if the
value is "KVMKVMKVM" (that's how QEMU with KVM identifies itself) or
"VMwareVMware". To date, we know that VMWare, QEMU/KVM and Cloud
Hypervisor/KVM expose this TSC-frequency leaf 0x40000010. MS Hyper-V
does not expose this leaf. We don't know about other hypervisors.

Note that QEMU must start the VM with CPU flags
`+invtsc,+vmware-cpuid-freq` to expose required CPUID leaves.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
Dmitrii Kuvaiskii committed Aug 3, 2023
1 parent 92636ae commit 9ef75eb
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 47 deletions.
2 changes: 2 additions & 0 deletions common/include/arch/x86_64/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ enum extended_state_sub_leaf {
#define PROC_FREQ_LEAF 0x16
#define AMX_TILE_INFO_LEAF 0x1D
#define AMX_TMUL_INFO_LEAF 0x1E
#define HYPERVISOR_INFO_LEAF 0x40000000
#define HYPERVISOR_VMWARE_TIME_LEAF 0x40000010
#define MAX_INPUT_EXT_VALUE_LEAF 0x80000000
#define EXT_SIGNATURE_AND_FEATURES_LEAF 0x80000001
#define CPU_BRAND_LEAF 0x80000002
Expand Down
2 changes: 0 additions & 2 deletions pal/src/host/linux-sgx/pal_linux.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@ void _PalExceptionHandler(unsigned int exit_info, sgx_cpu_context_t* uc,
* its underlying type. */
void _PalHandleExternalEvent(long event_, sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state);

bool is_tsc_usable(void);
uint64_t get_tsc_hz(void);
void init_tsc(void);

int init_cpuid(void);
Expand Down
11 changes: 11 additions & 0 deletions pal/src/host/linux-sgx/pal_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ static int import_and_init_extra_runtime_domain_names(struct pal_dns_host_conf*
extern void* g_enclave_base;
extern void* g_enclave_top;
extern bool g_allowed_files_warn;
extern uint64_t g_tsc_hz;

static int print_warnings_on_insecure_configs(PAL_HANDLE parent_process) {
int ret;
Expand Down Expand Up @@ -519,11 +520,21 @@ static int print_warnings_on_insecure_configs(PAL_HANDLE parent_process) {
return ret;
}

static void print_warning_on_invariant_tsc(PAL_HANDLE parent_process) {
if (!parent_process && !g_tsc_hz) {
/* Warn only in the first process. */
log_warning("Could not set up Invariant TSC (CPU is too old or you run on a VM that does "
"not expose corresponding CPUID leaves). This degrades performance.");
}
}

static void post_callback(void) {
if (print_warnings_on_insecure_configs(g_pal_common_state.parent_process) < 0) {
log_error("Cannot parse the manifest (while checking for insecure configurations)");
ocall_exit(1, /*is_exitgroup=*/true);
}

print_warning_on_invariant_tsc(g_pal_common_state.parent_process);
}

__attribute_no_sanitize_address
Expand Down
156 changes: 111 additions & 45 deletions pal/src/host/linux-sgx/pal_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,112 @@ static uint64_t g_start_tsc = 0;
static uint64_t g_start_usec = 0;
static seqlock_t g_tsc_lock = INIT_SEQLOCK_UNLOCKED;

/**
* Initialize the data structures used for date/time emulation using TSC
*/
void init_tsc(void) {
if (is_tsc_usable()) {
g_tsc_hz = get_tsc_hz();
static bool is_tsc_usable(void) {
uint32_t words[CPUID_WORD_NUM];
_PalCpuIdRetrieve(INVARIANT_TSC_LEAF, 0, words);
return words[CPUID_WORD_EDX] & (1 << 8);
}

/* return TSC frequency or 0 if invariant TSC is not supported */
static uint64_t get_tsc_hz_baremetal(void) {
uint32_t words[CPUID_WORD_NUM];

/*
* Based on "Time Stamp Counter and Nominal Core Crystal Clock Information" leaf, calculate TSC
* frequency as ECX * EBX / EAX, where
* - EAX is denominator of the TSC/"core crystal clock" ratio,
* - EBX is numerator of the TSC/"core crystal clock" ratio,
* - ECX is core crystal clock (nominal) frequency in Hz.
*/
_PalCpuIdRetrieve(TSC_FREQ_LEAF, 0, words);
if (!words[CPUID_WORD_EAX] || !words[CPUID_WORD_EBX]) {
/* TSC/core crystal clock ratio is not enumerated, can't use RDTSC for accurate time */
return 0;
}

if (words[CPUID_WORD_ECX] > 0) {
/* cast to 64-bit first to prevent integer overflow */
return (uint64_t)words[CPUID_WORD_ECX] * words[CPUID_WORD_EBX] / words[CPUID_WORD_EAX];
}

/* some Intel CPUs do not report nominal frequency of crystal clock, let's calculate it
* based on Processor Frequency Information Leaf (CPUID 16H); this leaf always exists if
* TSC Frequency Leaf exists; logic is taken from Linux 5.11's arch/x86/kernel/tsc.c */
_PalCpuIdRetrieve(PROC_FREQ_LEAF, 0, words);
if (!words[CPUID_WORD_EAX]) {
/* processor base frequency (in MHz) is not enumerated, can't calculate frequency */
return 0;
}

/* processor base frequency is in MHz but we need to return TSC frequency in Hz; cast to 64-bit
* first to prevent integer overflow */
return (uint64_t)words[CPUID_WORD_EAX] * 1000000;
}

/* return TSC frequency or 0 if invariant TSC is not supported */
static uint64_t get_tsc_hz_hypervisor(void) {
uint32_t words[CPUID_WORD_NUM];

/*
* We rely on the Generic CPUID space for hypervisors:
* - 0x40000000: EAX: The maximum input value for CPUID supported by the hypervisor
* - EBX, ECX, EDX: Hypervisor vendor ID signature (hypervisor_id)
*
* If we detect QEMU/KVM or Cloud Hypervisor/KVM (hypervisor_id = "KVMKVMKVM") or VMWare
* ("VMwareVMware"), then we assume that leaf 0x40000010 contains virtual TSC frequency in kHz
* in EAX. We check hypervisor_id because leaf 0x40000010 is not standardized and e.g. Microsoft
* Hyper-V may use it for other purposes.
*
* Relevant materials:
* - https://github.com/qemu/qemu/commit/9954a1582e18b03ddb66f6c892dccf2c3508f4b2
* - qemu/target/i386/cpu.h, qemu/target/i386/cpu.c, qemu/target/i386/kvm/kvm.c sources
* - https://github.com/freebsd/freebsd-src/blob/9df6eea/sys/x86/x86/identcpu.c#L1372-L1377 (for
* the list of hypervisor_id values)
*/
_PalCpuIdRetrieve(HYPERVISOR_INFO_LEAF, 0, words);

bool is_kvm = words[CPUID_WORD_EBX] == 0x4b4d564b
&& words[CPUID_WORD_ECX] == 0x564b4d56
&& words[CPUID_WORD_EDX] == 0x0000004d;
bool is_vmware = words[CPUID_WORD_EBX] == 0x61774d56
&& words[CPUID_WORD_ECX] == 0x4d566572
&& words[CPUID_WORD_EDX] == 0x65726177;

if (!is_kvm && !is_vmware) {
/* not a hypervisor that contains "virtual TSC frequency" in leaf 0x40000010 */
return 0;
}

if (words[CPUID_WORD_EAX] < HYPERVISOR_VMWARE_TIME_LEAF) {
/* virtual TSC frequency is not available */
return 0;
}

_PalCpuIdRetrieve(HYPERVISOR_VMWARE_TIME_LEAF, 0, words);
if (!words[CPUID_WORD_EAX]) {
/* TSC frequency (in kHz) is not enumerated, can't calculate frequency */
return 0;
}

/* TSC frequency is in kHz but we need to return TSC frequency in Hz; cast to 64-bit first to
* prevent integer overflow */
return (uint64_t)words[CPUID_WORD_EAX] * 1000;
}

/* initialize the data structures used for date/time emulation using TSC */
void init_tsc(void) {
if (!is_tsc_usable())
return;

g_tsc_hz = get_tsc_hz_baremetal();
if (g_tsc_hz)
return;

/* hypervisors may not expose crystal-clock frequency CPUID leaves, so instead try
* hypervisor-special synthetic CPUID leaf 0x40000010 (VMWare-style Timing Information) */
g_tsc_hz = get_tsc_hz_hypervisor();
if (g_tsc_hz)
return;
}

int _PalSystemTimeQuery(uint64_t* out_usec) {
Expand Down Expand Up @@ -413,8 +512,13 @@ static const struct cpuid_leaf cpuid_known_leaves[] = {
{.leaf = 0x1F, .zero_subleaf = false, .cache = false}, /* Intel V2 Ext Topology Enumeration */
/* basic CPUID leaf functions end here */

/* hypervisor-specific CPUID leaf functions (0x40000000 - 0x400000FF) start here */
{.leaf = 0x40000000, .zero_subleaf = true, .cache = true}, /* CPUID Info */
{.leaf = 0x40000010, .zero_subleaf = true, .cache = true}, /* VMWare-style Timing Info */
/* NOTE: currently only the above two leaves are used, see also get_tsc_hz_hypervisor() */

/* invalid CPUID leaf functions (no existing or future CPU will return any meaningful
* information in these leaves) occupy 40000000 - 4FFFFFFFH -- they are treated the same as
* information in these leaves) occupy 0x40000100 - 0x4FFFFFFF -- they are treated the same as
* unrecognized leaves, see code below */

/* extended CPUID leaf functions start here */
Expand Down Expand Up @@ -672,44 +776,6 @@ ssize_t read_file_buffer(const char* filename, char* buf, size_t buf_size) {
return n;
}

bool is_tsc_usable(void) {
uint32_t words[CPUID_WORD_NUM];
_PalCpuIdRetrieve(INVARIANT_TSC_LEAF, 0, words);
return words[CPUID_WORD_EDX] & 1 << 8;
}

/* return TSC frequency or 0 if invariant TSC is not supported */
uint64_t get_tsc_hz(void) {
uint32_t words[CPUID_WORD_NUM];

_PalCpuIdRetrieve(TSC_FREQ_LEAF, 0, words);
if (!words[CPUID_WORD_EAX] || !words[CPUID_WORD_EBX]) {
/* TSC/core crystal clock ratio is not enumerated, can't use RDTSC for accurate time */
return 0;
}

if (words[CPUID_WORD_ECX] > 0) {
/* calculate TSC frequency as core crystal clock frequency (EAX) * EBX / EAX; cast to 64-bit
* first to prevent integer overflow */
uint64_t ecx_hz = words[CPUID_WORD_ECX];
return ecx_hz * words[CPUID_WORD_EBX] / words[CPUID_WORD_EAX];
}

/* some Intel CPUs do not report nominal frequency of crystal clock, let's calculate it
* based on Processor Frequency Information Leaf (CPUID 16H); this leaf always exists if
* TSC Frequency Leaf exists; logic is taken from Linux 5.11's arch/x86/kernel/tsc.c */
_PalCpuIdRetrieve(PROC_FREQ_LEAF, 0, words);
if (!words[CPUID_WORD_EAX]) {
/* processor base frequency (in MHz) is not enumerated, can't calculate frequency */
return 0;
}

/* processor base frequency is in MHz but we need to return TSC frequency in Hz; cast to 64-bit
* first to prevent integer overflow */
uint64_t base_frequency_mhz = words[CPUID_WORD_EAX];
return base_frequency_mhz * 1000000;
}

int _PalRandomBitsRead(void* buffer, size_t size) {
uint32_t rand;
for (size_t i = 0; i < size; i += sizeof(rand)) {
Expand Down

0 comments on commit 9ef75eb

Please sign in to comment.