Skip to content

Commit

Permalink
LoongArch: Improve hardware page table walker
Browse files Browse the repository at this point in the history
LoongArch has similar problems explained in commit 7f0b1bf
("arm64: Fix barriers used for page table modifications"), when hardware
page table walker (PTW) enabled, speculative accesses may cause spurious
page fault in kernel space. Theoretically, in order to completely avoid
spurious page fault we need a "dbar + ibar" pair between the page table
modifications and the subsequent memory accesses using the corresponding
virtual address. But "ibar" is too heavy for performace, so we only use
a "dbar 0b11000" in set_pte(). And let spurious_fault() filter the rest
rare spurious page faults which should be avoided by "ibar".

Besides, we replace the llsc loop with amo in set_pte() which has better
performace, and adjust switch_mm_irqs_off() to avoid branch instructions
between the writing of CSR.ASID and CSR.PGDL.

Signed-off-by: Huacai Chen <[email protected]>
  • Loading branch information
chenhuacai committed Sep 5, 2024
1 parent 01f6b6a commit 894cf4e
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 21 deletions.
2 changes: 2 additions & 0 deletions arch/loongarch/include/asm/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
#define __LL "ll.w "
#define __SC "sc.w "
#define __AMADD "amadd.w "
#define __AMOR "amor.w "
#define __AMAND_DB "amand_db.w "
#define __AMOR_DB "amor_db.w "
#define __AMXOR_DB "amxor_db.w "
#elif __SIZEOF_LONG__ == 8
#define __LL "ll.d "
#define __SC "sc.d "
#define __AMADD "amadd.d "
#define __AMOR "amor.d "
#define __AMAND_DB "amand_db.d "
#define __AMOR_DB "amor_db.d "
#define __AMXOR_DB "amxor_db.d "
Expand Down
4 changes: 2 additions & 2 deletions arch/loongarch/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
if (!asid_valid(next, cpu))
get_new_mmu_context(next, cpu);

write_csr_asid(cpu_asid(cpu, next));

if (next != &init_mm)
csr_write64((unsigned long)next->pgd, LOONGARCH_CSR_PGDL);
else
csr_write64((unsigned long)invalid_pg_dir, LOONGARCH_CSR_PGDL);

write_csr_asid(cpu_asid(cpu, next));

/*
* Mark current->active_mm as not "active" anymore.
* We don't want to mislead possible IPI tlb flush routines.
Expand Down
32 changes: 13 additions & 19 deletions arch/loongarch/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,29 +331,23 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
* Make sure the buddy is global too (if it's !none,
* it better already be global)
*/
if (pte_none(ptep_get(buddy))) {
#ifdef CONFIG_SMP
/*
* For SMP, multiple CPUs can race, so we need to do
* this atomically.
*/
unsigned long page_global = _PAGE_GLOBAL;
unsigned long tmp;

__asm__ __volatile__ (
"1:" __LL "%[tmp], %[buddy] \n"
" bnez %[tmp], 2f \n"
" or %[tmp], %[tmp], %[global] \n"
__SC "%[tmp], %[buddy] \n"
" beqz %[tmp], 1b \n"
" nop \n"
"2: \n"
__WEAK_LLSC_MB
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
: [global] "r" (page_global));
/*
* For SMP, multiple CPUs can race, so we need
* to do this atomically.
*/
__asm__ __volatile__(
__AMOR "$zero, %[global], %[buddy] \n"
: [buddy] "+ZB" (buddy->pte)
: [global] "r" (_PAGE_GLOBAL)
: "memory");

DBAR(0b11000); /* o_wrw = 0b11000 */
#else /* !CONFIG_SMP */
if (pte_none(ptep_get(buddy)))
WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL));
#endif /* CONFIG_SMP */
}
}
}

Expand Down
41 changes: 41 additions & 0 deletions arch/loongarch/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,52 @@

int show_unhandled_signals = 1;

static int __kprobes spurious_fault(unsigned long write, unsigned long address)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;

if (!(address & __UA_LIMIT))
return 0;

pgd = pgd_offset_k(address);
if (!pgd_present(pgdp_get(pgd)))
return 0;

p4d = p4d_offset(pgd, address);
if (!p4d_present(p4dp_get(p4d)))
return 0;

pud = pud_offset(p4d, address);
if (!pud_present(pudp_get(pud)))
return 0;

pmd = pmd_offset(pud, address);
if (!pmd_present(pmdp_get(pmd)))
return 0;

if (pmd_leaf(*pmd)) {
return write ? pmd_write(pmdp_get(pmd)) : 1;
} else {
pte = pte_offset_kernel(pmd, address);
if (!pte_present(ptep_get(pte)))
return 0;

return write ? pte_write(ptep_get(pte)) : 1;
}
}

static void __kprobes no_context(struct pt_regs *regs,
unsigned long write, unsigned long address)
{
const int field = sizeof(unsigned long) * 2;

if (spurious_fault(write, address))
return;

/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
Expand Down

0 comments on commit 894cf4e

Please sign in to comment.