From bc5e138e99717b933e567ca9ae057ead498878cf Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 11 Apr 2024 09:51:20 +0900 Subject: [PATCH 1/5] prctl: Introduce PR_{SET,GET}_MEM_MODEL On some architectures, it is possible to query and/or change the CPU memory model. This allows userspace to switch to a stricter memory model for performance reasons, such as when emulating code for another architecture where that model is the default. Introduce two prctls to allow userspace to query and set the memory model for a thread. Two models are initially defined: - PR_SET_MEM_MODEL_DEFAULT requests the default memory model for the architecture. - PR_SET_MEM_MODEL_TSO requests the x86 TSO memory model. PR_SET_MEM_MODEL is allowed to set a stricter memory model than requested if available, in which case it will return successfully. If the requested memory model cannot be fulfilled, it will return an error. The memory model that was actually set can be queried by a subsequent call to PR_GET_MEM_MODEL. Examples: - On a CPU with not support for a memory model at least as strong as TSO, PR_SET_MEM_MODEL(PR_SET_MEM_MODEL_TSO) fails. - On a CPU with runtime-configurable TSO support, PR_SET_MEM_MODEL can toggle the memory model between DEFAULT and TSO at will. - On a CPU where the only memory model is at least as strict as TSO, PR_GET_MEM_MODEL will return PR_SET_MEM_MODEL_DEFAULT, and PR_SET_MEM_MODEL(PR_SET_MEM_MODEL_TSO) will return success but leave the memory model at PR_SET_MEM_MODEL_DEFAULT. This implies that the default is in fact at least as strict as TSO. Signed-off-by: Hector Martin Reviewed-by: Neal Gompa --- include/linux/memory_ordering_model.h | 11 +++++++++++ include/uapi/linux/prctl.h | 5 +++++ kernel/sys.c | 21 +++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 include/linux/memory_ordering_model.h diff --git a/include/linux/memory_ordering_model.h b/include/linux/memory_ordering_model.h new file mode 100644 index 00000000000000..267a12ca66307e --- /dev/null +++ b/include/linux/memory_ordering_model.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MEMORY_ORDERING_MODEL_H +#define __ASM_MEMORY_ORDERING_MODEL_H + +/* Arch hooks to implement the PR_{GET_SET}_MEM_MODEL prctls */ + +struct task_struct; +int arch_prctl_mem_model_get(struct task_struct *t); +int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val); + +#endif diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 370ed14b1ae092..961216093f11ab 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -306,4 +306,9 @@ struct prctl_mm_map { # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc # define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f +#define PR_GET_MEM_MODEL 0x6d4d444c +#define PR_SET_MEM_MODEL 0x4d4d444c +# define PR_SET_MEM_MODEL_DEFAULT 0 +# define PR_SET_MEM_MODEL_TSO 1 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 8bb106a56b3a5f..0cf327d7c1002a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -2445,6 +2446,16 @@ static int prctl_get_auxv(void __user *addr, unsigned long len) return sizeof(mm->saved_auxv); } +int __weak arch_prctl_mem_model_get(struct task_struct *t) +{ + return -EINVAL; +} + +int __weak arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) +{ + return -EINVAL; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2760,6 +2771,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_RISCV_V_GET_CONTROL: error = RISCV_V_GET_CONTROL(); break; + case PR_GET_MEM_MODEL: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = arch_prctl_mem_model_get(me); + break; + case PR_SET_MEM_MODEL: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_prctl_mem_model_set(me, arg2); + break; default: error = -EINVAL; break; From f1eb4fc271aec23a9714b5694ced7308828ebefe Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 11 Apr 2024 09:51:21 +0900 Subject: [PATCH 2/5] arm64: Implement PR_{GET,SET}_MEM_MODEL for always-TSO CPUs Some ARM64 implementations are known to always use the TSO memory model. Add trivial support for the PR_{GET,SET}_MEM_MODEL prctl, which allows userspace to learn this fact. Known TSO implementations: - Nvidia Denver - Nvidia Carmel - Fujitsu A64FX Signed-off-by: Hector Martin Reviewed-by: Neal Gompa --- arch/arm64/Kconfig | 9 +++++++ arch/arm64/include/asm/cpufeature.h | 4 +++ arch/arm64/kernel/Makefile | 3 ++- arch/arm64/kernel/cpufeature.c | 11 ++++---- arch/arm64/kernel/cpufeature_impdef.c | 38 +++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 24 +++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 7 files changed, 84 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/kernel/cpufeature_impdef.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b11c98b3e84bf..f8e66fe44ff408 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2162,6 +2162,15 @@ config ARM64_DEBUG_PRIORITY_MASKING If unsure, say N endif # ARM64_PSEUDO_NMI +config ARM64_MEMORY_MODEL_CONTROL + bool "Runtime memory model control" + help + Some ARM64 CPUs support runtime switching of the CPU memory + model, which can be useful to emulate other CPU architectures + which have different memory models. Say Y to enable support + for the PR_SET_MEM_MODEL/PR_GET_MEM_MODEL prctl() calls on + CPUs with this feature. + config RELOCATABLE bool "Build a relocatable kernel image" if EXPERT select ARCH_HAS_RELR diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8b904a757bd341..fb215b0e7529dd 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -1032,6 +1032,10 @@ static inline bool cpu_has_lpa2(void) #endif } +void __init init_cpucap_indirect_list_impdef(void); +void __init init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps); +bool cpufeature_matches(u64 reg, const struct arm64_cpu_capabilities *entry); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 763824963ed157..f0471d0ab85355 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -33,7 +33,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o idle.o patching.o pi/ + syscall.o proton-pack.o idle.o patching.o \ + cpufeature_impdef.o pi/ obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 56583677c1f294..e39ab93ad68355 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1028,7 +1028,7 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new) extern const struct arm64_cpu_capabilities arm64_errata[]; static const struct arm64_cpu_capabilities arm64_features[]; -static void __init +void __init init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { @@ -1540,8 +1540,8 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope) return true; } -static bool -feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) +bool +cpufeature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { int val, min, max; u64 tmp; @@ -1594,14 +1594,14 @@ has_user_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) if (!mask) return false; - return feature_matches(val, entry); + return cpufeature_matches(val, entry); } static bool has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) { u64 val = read_scoped_sysreg(entry, scope); - return feature_matches(val, entry); + return cpufeature_matches(val, entry); } const struct cpumask *system_32bit_el0_cpumask(void) @@ -3486,6 +3486,7 @@ void __init setup_boot_cpu_features(void) * handle the boot CPU. */ init_cpucap_indirect_list(); + init_cpucap_indirect_list_impdef(); /* * Detect broken pseudo-NMI. Must be called _before_ the call to diff --git a/arch/arm64/kernel/cpufeature_impdef.c b/arch/arm64/kernel/cpufeature_impdef.c new file mode 100644 index 00000000000000..bb04a8e3d79d85 --- /dev/null +++ b/arch/arm64/kernel/cpufeature_impdef.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Contains implementation-defined CPU feature definitions. + */ + +#include + +#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL +static bool has_tso_fixed(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* List of CPUs that always use the TSO memory model */ + static const struct midr_range fixed_tso_list[] = { + MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + MIDR_ALL_VERSIONS(MIDR_FUJITSU_A64FX), + { /* sentinel */ } + }; + + return is_midr_in_range_list(read_cpuid_id(), fixed_tso_list); +} +#endif + +static const struct arm64_cpu_capabilities arm64_impdef_features[] = { +#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL + { + .desc = "TSO memory model (Fixed)", + .capability = ARM64_HAS_TSO_FIXED, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_tso_fixed, + }, +#endif + {}, +}; + +void __init init_cpucap_indirect_list_impdef(void) +{ + init_cpucap_indirect_list_from_array(arm64_impdef_features); +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4ae31b7af6c311..7920056bad3ed1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -513,6 +514,25 @@ void update_sctlr_el1(u64 sctlr) isb(); } +#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL +int arch_prctl_mem_model_get(struct task_struct *t) +{ + return PR_SET_MEM_MODEL_DEFAULT; +} + +int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) +{ + if (alternative_has_cap_unlikely(ARM64_HAS_TSO_FIXED) && + val == PR_SET_MEM_MODEL_TSO) + return 0; + + if (val == PR_SET_MEM_MODEL_DEFAULT) + return 0; + + return -EINVAL; +} +#endif + /* * Thread switching. */ @@ -651,6 +671,10 @@ void arch_setup_new_exec(void) arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE); } + +#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL + arch_prctl_mem_model_set(current, PR_SET_MEM_MODEL_DEFAULT); +#endif } #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 62b2838a231ada..daa6b94954028d 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -52,6 +52,7 @@ HAS_STAGE2_FWB HAS_TCR2 HAS_TIDCP1 HAS_TLB_RANGE +HAS_TSO_FIXED HAS_VA52 HAS_VIRT_HOST_EXTN HAS_WFXT From 8bde8f667f979d69507793af007103500659a0ea Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 11 Apr 2024 09:51:22 +0900 Subject: [PATCH 3/5] arm64: Introduce scaffolding to add ACTLR_EL1 to thread state Some CPUs expose IMPDEF features in ACTLR_EL1 that can be meaningfully controlled per-thread (like TSO control on Apple cores). Add the basic scaffolding to save/restore this register as part of context switching. This mechanism is disabled by default both by config symbol and via a runtime check, which ensures it is never triggered unless the system is known to need it for some feature (which also implies that the layout of ACTLR_EL1 is uniform between all CPU core types). Signed-off-by: Hector Martin Reviewed-by: Neal Gompa --- arch/arm64/Kconfig | 3 +++ arch/arm64/include/asm/cpufeature.h | 5 +++++ arch/arm64/include/asm/processor.h | 3 +++ arch/arm64/kernel/process.c | 25 +++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 8 ++++++++ 5 files changed, 44 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f8e66fe44ff408..9b3593b34ccecf 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -408,6 +408,9 @@ config KASAN_SHADOW_OFFSET config UNWIND_TABLES bool +config ARM64_ACTLR_STATE + bool + source "arch/arm64/Kconfig.platforms" menu "Kernel Features" diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fb215b0e7529dd..46ab37f8f4d882 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -909,6 +909,11 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) return 8; } +static __always_inline bool system_has_actlr_state(void) +{ + return false; +} + s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index f77371232d8c6d..d43c5791a35e22 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -184,6 +184,9 @@ struct thread_struct { u64 sctlr_user; u64 svcr; u64 tpidr2_el0; +#ifdef CONFIG_ARM64_ACTLR_STATE + u64 actlr; +#endif }; static inline unsigned int thread_get_vl(struct thread_struct *thread, diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7920056bad3ed1..117f80e16aac87 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -372,6 +372,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (system_supports_tpidr2()) p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); +#ifdef CONFIG_ARM64_ACTLR_STATE + if (system_has_actlr_state()) + p->thread.actlr = read_sysreg(actlr_el1); +#endif + if (stack_start) { if (is_compat_thread(task_thread_info(p))) childregs->compat_sp = stack_start; @@ -533,6 +538,25 @@ int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) } #endif +#ifdef CONFIG_ARM64_ACTLR_STATE +/* + * IMPDEF control register ACTLR_EL1 handling. Some CPUs use this to + * expose features that can be controlled by userspace. + */ +static void actlr_thread_switch(struct task_struct *next) +{ + if (!system_has_actlr_state()) + return; + + current->thread.actlr = read_sysreg(actlr_el1); + write_sysreg(next->thread.actlr, actlr_el1); +} +#else +static inline void actlr_thread_switch(struct task_struct *next) +{ +} +#endif + /* * Thread switching. */ @@ -550,6 +574,7 @@ struct task_struct *__switch_to(struct task_struct *prev, ssbs_thread_switch(next); erratum_1418040_thread_switch(next); ptrauth_thread_switch_user(next); + actlr_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 65a052bf741f04..35342f633a8508 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -359,6 +359,14 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) */ init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); #endif +#ifdef CONFIG_ARM64_ACTLR_STATE + /* Store the boot CPU ACTLR_EL1 value as the default. This will only + * be actually restored during context switching iff the platform is + * known to use ACTLR_EL1 for exposable features and its layout is + * known to be the same on all CPUs. + */ + init_task.thread.actlr = read_sysreg(actlr_el1); +#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" From c1c120a4fcefa339591770e05eb471cc12efc793 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 11 Apr 2024 09:51:23 +0900 Subject: [PATCH 4/5] arm64: Implement Apple IMPDEF TSO memory model control Apple CPUs may implement the TSO memory model as an optional configurable mode. This allows x86 emulators to simplify their load/store handling, greatly increasing performance. Expose this via the prctl PR_SET_MEM_MODEL_TSO mechanism. We use the Apple IMPDEF AIDR_EL1 register to check for the availability of TSO mode, and enable this codepath on all CPUs with an Apple implementer. This relies on the ACTLR_EL1 thread state scaffolding introduced earlier. Signed-off-by: Hector Martin Reviewed-by: Neal Gompa --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/apple_cpufeature.h | 15 +++++++++++++++ arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/kernel/cpufeature_impdef.c | 23 +++++++++++++++++++++++ arch/arm64/kernel/process.c | 22 ++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 6 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/apple_cpufeature.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9b3593b34ccecf..2f3eedd955c94b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2167,6 +2167,8 @@ endif # ARM64_PSEUDO_NMI config ARM64_MEMORY_MODEL_CONTROL bool "Runtime memory model control" + default ARCH_APPLE + select ARM64_ACTLR_STATE help Some ARM64 CPUs support runtime switching of the CPU memory model, which can be useful to emulate other CPU architectures diff --git a/arch/arm64/include/asm/apple_cpufeature.h b/arch/arm64/include/asm/apple_cpufeature.h new file mode 100644 index 00000000000000..4370d91ffa3ec9 --- /dev/null +++ b/arch/arm64/include/asm/apple_cpufeature.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __ASM_APPLE_CPUFEATURES_H +#define __ASM_APPLE_CPUFEATURES_H + +#include +#include + +#define AIDR_APPLE_TSO_SHIFT 9 +#define AIDR_APPLE_TSO BIT(9) + +#define ACTLR_APPLE_TSO_SHIFT 1 +#define ACTLR_APPLE_TSO BIT(1) + +#endif diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 46ab37f8f4d882..a191000d88c285 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -911,7 +911,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) static __always_inline bool system_has_actlr_state(void) { - return false; + return IS_ENABLED(CONFIG_ARM64_ACTLR_STATE) && + alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE); } s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); diff --git a/arch/arm64/kernel/cpufeature_impdef.c b/arch/arm64/kernel/cpufeature_impdef.c index bb04a8e3d79d85..9325d1eb12f402 100644 --- a/arch/arm64/kernel/cpufeature_impdef.c +++ b/arch/arm64/kernel/cpufeature_impdef.c @@ -4,8 +4,21 @@ */ #include +#include #ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL +static bool has_apple_feature(const struct arm64_cpu_capabilities *entry, int scope) +{ + u64 val; + WARN_ON(scope != SCOPE_SYSTEM); + + if (read_cpuid_implementor() != ARM_CPU_IMP_APPLE) + return false; + + val = read_sysreg(aidr_el1); + return cpufeature_matches(val, entry); +} + static bool has_tso_fixed(const struct arm64_cpu_capabilities *entry, int scope) { /* List of CPUs that always use the TSO memory model */ @@ -22,6 +35,16 @@ static bool has_tso_fixed(const struct arm64_cpu_capabilities *entry, int scope) static const struct arm64_cpu_capabilities arm64_impdef_features[] = { #ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL + { + .desc = "TSO memory model (Apple)", + .capability = ARM64_HAS_TSO_APPLE, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_apple_feature, + .field_pos = AIDR_APPLE_TSO_SHIFT, + .field_width = 1, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, { .desc = "TSO memory model (Fixed)", .capability = ARM64_HAS_TSO_FIXED, diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 117f80e16aac87..34a19ecfb630ba 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -522,6 +523,10 @@ void update_sctlr_el1(u64 sctlr) #ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL int arch_prctl_mem_model_get(struct task_struct *t) { + if (alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE) && + t->thread.actlr & ACTLR_APPLE_TSO) + return PR_SET_MEM_MODEL_TSO; + return PR_SET_MEM_MODEL_DEFAULT; } @@ -531,6 +536,23 @@ int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) val == PR_SET_MEM_MODEL_TSO) return 0; + if (alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE)) { + WARN_ON(!system_has_actlr_state()); + + switch (val) { + case PR_SET_MEM_MODEL_TSO: + t->thread.actlr |= ACTLR_APPLE_TSO; + break; + case PR_SET_MEM_MODEL_DEFAULT: + t->thread.actlr &= ~ACTLR_APPLE_TSO; + break; + default: + return -EINVAL; + } + write_sysreg(t->thread.actlr, actlr_el1); + return 0; + } + if (val == PR_SET_MEM_MODEL_DEFAULT) return 0; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index daa6b94954028d..62f9ca9ce44b41 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -52,6 +52,7 @@ HAS_STAGE2_FWB HAS_TCR2 HAS_TIDCP1 HAS_TLB_RANGE +HAS_TSO_APPLE HAS_TSO_FIXED HAS_VA52 HAS_VIRT_HOST_EXTN From 54feb6cfd5cbf2f2a3fe20da2b793612f2bfefde Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Sat, 25 May 2024 20:22:29 +0900 Subject: [PATCH 5/5] KVM: arm64: Expose TSO capability to guests and context switch Signed-off-by: Asahi Lina --- arch/arm64/include/asm/kvm_emulate.h | 3 +++ arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 975af30af31fa2..78df70c55abc5c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -72,6 +72,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; if (has_vhe() || has_hvhe()) vcpu->arch.hcr_el2 |= HCR_E2H; + if (IS_ENABLED(CONFIG_ARM64_ACTLR_STATE) && + alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE)) + vcpu->arch.hcr_el2 &= ~HCR_TACR; if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) { /* route synchronous external abort exceptions to EL2 */ vcpu->arch.hcr_el2 |= HCR_TEA; diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 4be6a7fa007082..7f1de448d2ae00 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -16,6 +16,8 @@ #include #include +#define SYS_IMP_APL_ACTLR_EL12 sys_reg(3, 6, 15, 14, 6) + static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); @@ -88,6 +90,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); + if (IS_ENABLED(CONFIG_ARM64_ACTLR_STATE) && + alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE)) + ctxt_sys_reg(ctxt, ACTLR_EL1) = read_sysreg_s(SYS_IMP_APL_ACTLR_EL12); } static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) @@ -156,6 +161,10 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + if (IS_ENABLED(CONFIG_ARM64_ACTLR_STATE) && + alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE)) + write_sysreg_s(ctxt_sys_reg(ctxt, ACTLR_EL1), SYS_IMP_APL_ACTLR_EL12); + if (ctxt_has_mte(ctxt)) { write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);