aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will@kernel.org>2019-08-22 12:22:14 -0400
committerWill Deacon <will@kernel.org>2019-08-27 12:38:36 -0400
commit42f91093b043332ad75cea7aeafecda6fe81814c (patch)
tree664521734b32a8f876461ce30017046b57faa505
parente8620cff99946ea1f7891d7bec071a23a1fdaef3 (diff)
arm64: mm: Ignore spurious translation faults taken from the kernel
Thanks to address translation being performed out of order with respect to loads and stores, it is possible for a CPU to take a translation fault when accessing a page that was mapped by a different CPU. For example, in the case that one CPU maps a page and then sets a flag to tell another CPU: CPU 0 ----- MOV X0, <valid pte> STR X0, [Xptep] // Store new PTE to page table DSB ISHST ISB MOV X1, #1 STR X1, [Xflag] // Set the flag CPU 1 ----- loop: LDAR X0, [Xflag] // Poll flag with Acquire semantics CBZ X0, loop LDR X1, [X2] // Translates using the new PTE then the final load on CPU 1 can raise a translation fault because the translation can be performed speculatively before the read of the flag and marked as "faulting" by the CPU. This isn't quite as bad as it sounds since, in reality, code such as: CPU 0 CPU 1 ----- ----- spin_lock(&lock); spin_lock(&lock); *ptr = vmalloc(size); if (*ptr) spin_unlock(&lock); foo = **ptr; spin_unlock(&lock); will not trigger the fault because there is an address dependency on CPU 1 which prevents the speculative translation. However, more exotic code where the virtual address is known ahead of time, such as: CPU 0 CPU 1 ----- ----- spin_lock(&lock); spin_lock(&lock); set_fixmap(0, paddr, prot); if (mapped) mapped = true; foo = *fix_to_virt(0); spin_unlock(&lock); spin_unlock(&lock); could fault. This can be avoided by any of: * Introducing broadcast TLB maintenance on the map path * Adding a DSB;ISB sequence after checking a flag which indicates that a virtual address is now mapped * Handling the spurious fault Given that we have never observed a problem due to this under Linux and future revisions of the architecture are being tightened so that translation table walks are effectively ordered in the same way as explicit memory accesses, we no longer treat spurious kernel faults as fatal if an AT instruction indicates that the access does not trigger a translation fault. Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Will Deacon <will@kernel.org>
-rw-r--r--arch/arm64/mm/fault.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index cfd65b63f36f..9808da29a653 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -8,6 +8,7 @@
8 */ 8 */
9 9
10#include <linux/acpi.h> 10#include <linux/acpi.h>
11#include <linux/bitfield.h>
11#include <linux/extable.h> 12#include <linux/extable.h>
12#include <linux/signal.h> 13#include <linux/signal.h>
13#include <linux/mm.h> 14#include <linux/mm.h>
@@ -242,6 +243,34 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
242 return false; 243 return false;
243} 244}
244 245
246static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
247 unsigned int esr,
248 struct pt_regs *regs)
249{
250 unsigned long flags;
251 u64 par, dfsc;
252
253 if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR ||
254 (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT)
255 return false;
256
257 local_irq_save(flags);
258 asm volatile("at s1e1r, %0" :: "r" (addr));
259 isb();
260 par = read_sysreg(par_el1);
261 local_irq_restore(flags);
262
263 if (!(par & SYS_PAR_EL1_F))
264 return false;
265
266 /*
267 * If we got a different type of fault from the AT instruction,
268 * treat the translation fault as spurious.
269 */
270 dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par);
271 return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT;
272}
273
245static void die_kernel_fault(const char *msg, unsigned long addr, 274static void die_kernel_fault(const char *msg, unsigned long addr,
246 unsigned int esr, struct pt_regs *regs) 275 unsigned int esr, struct pt_regs *regs)
247{ 276{
@@ -270,6 +299,10 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
270 if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) 299 if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
271 return; 300 return;
272 301
302 if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs),
303 "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr))
304 return;
305
273 if (is_el1_permission_fault(addr, esr, regs)) { 306 if (is_el1_permission_fault(addr, esr, regs)) {
274 if (esr & ESR_ELx_WNR) 307 if (esr & ESR_ELx_WNR)
275 msg = "write to read-only memory"; 308 msg = "write to read-only memory";