aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-11-24 13:34:40 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2015-11-24 13:34:40 -0500
commit8bd142c01648cdb33e9bcafa0448ba2c20ed814c (patch)
tree9197c60d3f9d4036f38f281a183e94750ceea1d7 /arch/arm64
parentd792abacaf1a1a8dfea353fab699b97fa6251c2a (diff)
parentfbb4574ce9a37e15a9872860bf202f2be5bdf6c4 (diff)
Merge tag 'kvm-arm-for-v4.4-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-master
KVM/ARM Fixes for v4.4-rc3. Includes some timer fixes, properly unmapping PTEs, an errata fix, and two tweaks to the EL2 panic code.
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig21
-rw-r--r--arch/arm64/crypto/aes-ce-cipher.c2
-rw-r--r--arch/arm64/include/asm/barrier.h16
-rw-r--r--arch/arm64/include/asm/compat.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/dma-mapping.h13
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h8
-rw-r--r--arch/arm64/include/asm/mmu_context.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h1
-rw-r--r--arch/arm64/kernel/cpu_errata.c9
-rw-r--r--arch/arm64/kernel/cpuinfo.c5
-rw-r--r--arch/arm64/kernel/efi.c14
-rw-r--r--arch/arm64/kernel/suspend.c10
-rw-r--r--arch/arm64/kvm/hyp.S14
-rw-r--r--arch/arm64/kvm/inject_fault.c2
-rw-r--r--arch/arm64/mm/dma-mapping.c35
-rw-r--r--arch/arm64/mm/mmu.c14
-rw-r--r--arch/arm64/net/bpf_jit_comp.c48
18 files changed, 152 insertions, 68 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9ac16a482ff1..e55848c1edf4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -316,6 +316,27 @@ config ARM64_ERRATUM_832075
316 316
317 If unsure, say Y. 317 If unsure, say Y.
318 318
319config ARM64_ERRATUM_834220
320 bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault"
321 depends on KVM
322 default y
323 help
324 This option adds an alternative code sequence to work around ARM
325 erratum 834220 on Cortex-A57 parts up to r1p2.
326
327 Affected Cortex-A57 parts might report a Stage 2 translation
328 fault as the result of a Stage 1 fault for load crossing a
329 page boundary when there is a permission or device memory
330 alignment fault at Stage 1 and a translation fault at Stage 2.
331
332 The workaround is to verify that the Stage 1 translation
333 doesn't generate a fault before handling the Stage 2 fault.
334 Please note that this does not necessarily enable the workaround,
335 as it depends on the alternative framework, which will only patch
336 the kernel if an affected CPU is detected.
337
338 If unsure, say Y.
339
319config ARM64_ERRATUM_845719 340config ARM64_ERRATUM_845719
320 bool "Cortex-A53: 845719: a load might read incorrect data" 341 bool "Cortex-A53: 845719: a load might read incorrect data"
321 depends on COMPAT 342 depends on COMPAT
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index ce47792a983d..f7bd9bf0bbb3 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -237,7 +237,7 @@ EXPORT_SYMBOL(ce_aes_setkey);
237static struct crypto_alg aes_alg = { 237static struct crypto_alg aes_alg = {
238 .cra_name = "aes", 238 .cra_name = "aes",
239 .cra_driver_name = "aes-ce", 239 .cra_driver_name = "aes-ce",
240 .cra_priority = 300, 240 .cra_priority = 250,
241 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 241 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
242 .cra_blocksize = AES_BLOCK_SIZE, 242 .cra_blocksize = AES_BLOCK_SIZE,
243 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 243 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 624f9679f4b0..9622eb48f894 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -64,27 +64,31 @@ do { \
64 64
65#define smp_load_acquire(p) \ 65#define smp_load_acquire(p) \
66({ \ 66({ \
67 typeof(*p) ___p1; \ 67 union { typeof(*p) __val; char __c[1]; } __u; \
68 compiletime_assert_atomic_type(*p); \ 68 compiletime_assert_atomic_type(*p); \
69 switch (sizeof(*p)) { \ 69 switch (sizeof(*p)) { \
70 case 1: \ 70 case 1: \
71 asm volatile ("ldarb %w0, %1" \ 71 asm volatile ("ldarb %w0, %1" \
72 : "=r" (___p1) : "Q" (*p) : "memory"); \ 72 : "=r" (*(__u8 *)__u.__c) \
73 : "Q" (*p) : "memory"); \
73 break; \ 74 break; \
74 case 2: \ 75 case 2: \
75 asm volatile ("ldarh %w0, %1" \ 76 asm volatile ("ldarh %w0, %1" \
76 : "=r" (___p1) : "Q" (*p) : "memory"); \ 77 : "=r" (*(__u16 *)__u.__c) \
78 : "Q" (*p) : "memory"); \
77 break; \ 79 break; \
78 case 4: \ 80 case 4: \
79 asm volatile ("ldar %w0, %1" \ 81 asm volatile ("ldar %w0, %1" \
80 : "=r" (___p1) : "Q" (*p) : "memory"); \ 82 : "=r" (*(__u32 *)__u.__c) \
83 : "Q" (*p) : "memory"); \
81 break; \ 84 break; \
82 case 8: \ 85 case 8: \
83 asm volatile ("ldar %0, %1" \ 86 asm volatile ("ldar %0, %1" \
84 : "=r" (___p1) : "Q" (*p) : "memory"); \ 87 : "=r" (*(__u64 *)__u.__c) \
88 : "Q" (*p) : "memory"); \
85 break; \ 89 break; \
86 } \ 90 } \
87 ___p1; \ 91 __u.__val; \
88}) 92})
89 93
90#define read_barrier_depends() do { } while(0) 94#define read_barrier_depends() do { } while(0)
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 7fbed6919b54..eb8432bb82b8 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -23,7 +23,6 @@
23 */ 23 */
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/sched.h> 25#include <linux/sched.h>
26#include <linux/ptrace.h>
27 26
28#define COMPAT_USER_HZ 100 27#define COMPAT_USER_HZ 100
29#ifdef __AARCH64EB__ 28#ifdef __AARCH64EB__
@@ -234,7 +233,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
234 return (u32)(unsigned long)uptr; 233 return (u32)(unsigned long)uptr;
235} 234}
236 235
237#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs())) 236#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
238 237
239static inline void __user *arch_compat_alloc_user_space(long len) 238static inline void __user *arch_compat_alloc_user_space(long len)
240{ 239{
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 11d5bb0fdd54..52722ee73dba 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -29,8 +29,9 @@
29#define ARM64_HAS_PAN 4 29#define ARM64_HAS_PAN 4
30#define ARM64_HAS_LSE_ATOMICS 5 30#define ARM64_HAS_LSE_ATOMICS 5
31#define ARM64_WORKAROUND_CAVIUM_23154 6 31#define ARM64_WORKAROUND_CAVIUM_23154 6
32#define ARM64_WORKAROUND_834220 7
32 33
33#define ARM64_NCAPS 7 34#define ARM64_NCAPS 8
34 35
35#ifndef __ASSEMBLY__ 36#ifndef __ASSEMBLY__
36 37
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 54d0ead41afc..61e08f360e31 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -18,7 +18,6 @@
18 18
19#ifdef __KERNEL__ 19#ifdef __KERNEL__
20 20
21#include <linux/acpi.h>
22#include <linux/types.h> 21#include <linux/types.h>
23#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
24 23
@@ -26,22 +25,16 @@
26#include <asm/xen/hypervisor.h> 25#include <asm/xen/hypervisor.h>
27 26
28#define DMA_ERROR_CODE (~(dma_addr_t)0) 27#define DMA_ERROR_CODE (~(dma_addr_t)0)
29extern struct dma_map_ops *dma_ops;
30extern struct dma_map_ops dummy_dma_ops; 28extern struct dma_map_ops dummy_dma_ops;
31 29
32static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) 30static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
33{ 31{
34 if (unlikely(!dev)) 32 if (dev && dev->archdata.dma_ops)
35 return dma_ops;
36 else if (dev->archdata.dma_ops)
37 return dev->archdata.dma_ops; 33 return dev->archdata.dma_ops;
38 else if (acpi_disabled)
39 return dma_ops;
40 34
41 /* 35 /*
42 * When ACPI is enabled, if arch_set_dma_ops is not called, 36 * We expect no ISA devices, and all other DMA masters are expected to
43 * we will disable device DMA capability by setting it 37 * have someone call arch_setup_dma_ops at device creation time.
44 * to dummy_dma_ops.
45 */ 38 */
46 return &dummy_dma_ops; 39 return &dummy_dma_ops;
47} 40}
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 17e92f05b1fe..3ca894ecf699 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -99,11 +99,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
99 *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; 99 *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
100} 100}
101 101
102/*
103 * vcpu_reg should always be passed a register number coming from a
104 * read of ESR_EL2. Otherwise, it may give the wrong result on AArch32
105 * with banked registers.
106 */
102static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) 107static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
103{ 108{
104 if (vcpu_mode_is_32bit(vcpu))
105 return vcpu_reg32(vcpu, reg_num);
106
107 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; 109 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
108} 110}
109 111
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index c0e87898ba96..24165784b803 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -101,7 +101,7 @@ static inline void cpu_set_default_tcr_t0sz(void)
101#define destroy_context(mm) do { } while(0) 101#define destroy_context(mm) do { } while(0)
102void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); 102void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
103 103
104#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) 104#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; })
105 105
106/* 106/*
107 * This is called when "tsk" is about to enter lazy TLB mode. 107 * This is called when "tsk" is about to enter lazy TLB mode.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 9819a9426b69..7e074f93f383 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -81,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
81 81
82#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) 82#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
83#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) 83#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
84#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
84#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) 85#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
85#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) 86#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
86 87
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 24926f2504f7..feb6b4efa641 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -75,6 +75,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
75 (1 << MIDR_VARIANT_SHIFT) | 2), 75 (1 << MIDR_VARIANT_SHIFT) | 2),
76 }, 76 },
77#endif 77#endif
78#ifdef CONFIG_ARM64_ERRATUM_834220
79 {
80 /* Cortex-A57 r0p0 - r1p2 */
81 .desc = "ARM erratum 834220",
82 .capability = ARM64_WORKAROUND_834220,
83 MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
84 (1 << MIDR_VARIANT_SHIFT) | 2),
85 },
86#endif
78#ifdef CONFIG_ARM64_ERRATUM_845719 87#ifdef CONFIG_ARM64_ERRATUM_845719
79 { 88 {
80 /* Cortex-A53 r0p[01234] */ 89 /* Cortex-A53 r0p[01234] */
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 706679d0a0b4..212ae6361d8b 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -30,6 +30,7 @@
30#include <linux/seq_file.h> 30#include <linux/seq_file.h>
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <linux/smp.h> 32#include <linux/smp.h>
33#include <linux/delay.h>
33 34
34/* 35/*
35 * In case the boot CPU is hotpluggable, we record its initial state and 36 * In case the boot CPU is hotpluggable, we record its initial state and
@@ -112,6 +113,10 @@ static int c_show(struct seq_file *m, void *v)
112 */ 113 */
113 seq_printf(m, "processor\t: %d\n", i); 114 seq_printf(m, "processor\t: %d\n", i);
114 115
116 seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
117 loops_per_jiffy / (500000UL/HZ),
118 loops_per_jiffy / (5000UL/HZ) % 100);
119
115 /* 120 /*
116 * Dump out the common processor features in a single line. 121 * Dump out the common processor features in a single line.
117 * Userspace should read the hwcaps with getauxval(AT_HWCAP) 122 * Userspace should read the hwcaps with getauxval(AT_HWCAP)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index de46b50f4cdf..fc5508e0df57 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -224,6 +224,8 @@ static bool __init efi_virtmap_init(void)
224{ 224{
225 efi_memory_desc_t *md; 225 efi_memory_desc_t *md;
226 226
227 init_new_context(NULL, &efi_mm);
228
227 for_each_efi_memory_desc(&memmap, md) { 229 for_each_efi_memory_desc(&memmap, md) {
228 u64 paddr, npages, size; 230 u64 paddr, npages, size;
229 pgprot_t prot; 231 pgprot_t prot;
@@ -254,7 +256,8 @@ static bool __init efi_virtmap_init(void)
254 else 256 else
255 prot = PAGE_KERNEL; 257 prot = PAGE_KERNEL;
256 258
257 create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); 259 create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size,
260 __pgprot(pgprot_val(prot) | PTE_NG));
258 } 261 }
259 return true; 262 return true;
260} 263}
@@ -329,14 +332,7 @@ core_initcall(arm64_dmi_init);
329 332
330static void efi_set_pgd(struct mm_struct *mm) 333static void efi_set_pgd(struct mm_struct *mm)
331{ 334{
332 if (mm == &init_mm) 335 switch_mm(NULL, mm, NULL);
333 cpu_set_reserved_ttbr0();
334 else
335 cpu_switch_mm(mm->pgd, mm);
336
337 local_flush_tlb_all();
338 if (icache_is_aivivt())
339 __local_flush_icache_all();
340} 336}
341 337
342void efi_virtmap_load(void) 338void efi_virtmap_load(void)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index fce95e17cf7f..1095aa483a1c 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -1,3 +1,4 @@
1#include <linux/ftrace.h>
1#include <linux/percpu.h> 2#include <linux/percpu.h>
2#include <linux/slab.h> 3#include <linux/slab.h>
3#include <asm/cacheflush.h> 4#include <asm/cacheflush.h>
@@ -71,6 +72,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
71 local_dbg_save(flags); 72 local_dbg_save(flags);
72 73
73 /* 74 /*
75 * Function graph tracer state gets incosistent when the kernel
76 * calls functions that never return (aka suspend finishers) hence
77 * disable graph tracing during their execution.
78 */
79 pause_graph_tracing();
80
81 /*
74 * mm context saved on the stack, it will be restored when 82 * mm context saved on the stack, it will be restored when
75 * the cpu comes out of reset through the identity mapped 83 * the cpu comes out of reset through the identity mapped
76 * page tables, so that the thread address space is properly 84 * page tables, so that the thread address space is properly
@@ -111,6 +119,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
111 hw_breakpoint_restore(NULL); 119 hw_breakpoint_restore(NULL);
112 } 120 }
113 121
122 unpause_graph_tracing();
123
114 /* 124 /*
115 * Restore pstate flags. OS lock and mdscr have been already 125 * Restore pstate flags. OS lock and mdscr have been already
116 * restored, so from this point onwards, debugging is fully 126 * restored, so from this point onwards, debugging is fully
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 1599701ef044..86c289832272 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -864,6 +864,10 @@ ENTRY(__kvm_flush_vm_context)
864ENDPROC(__kvm_flush_vm_context) 864ENDPROC(__kvm_flush_vm_context)
865 865
866__kvm_hyp_panic: 866__kvm_hyp_panic:
867 // Stash PAR_EL1 before corrupting it in __restore_sysregs
868 mrs x0, par_el1
869 push x0, xzr
870
867 // Guess the context by looking at VTTBR: 871 // Guess the context by looking at VTTBR:
868 // If zero, then we're already a host. 872 // If zero, then we're already a host.
869 // Otherwise restore a minimal host context before panicing. 873 // Otherwise restore a minimal host context before panicing.
@@ -898,7 +902,7 @@ __kvm_hyp_panic:
898 mrs x3, esr_el2 902 mrs x3, esr_el2
899 mrs x4, far_el2 903 mrs x4, far_el2
900 mrs x5, hpfar_el2 904 mrs x5, hpfar_el2
901 mrs x6, par_el1 905 pop x6, xzr // active context PAR_EL1
902 mrs x7, tpidr_el2 906 mrs x7, tpidr_el2
903 907
904 mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ 908 mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
@@ -914,7 +918,7 @@ __kvm_hyp_panic:
914ENDPROC(__kvm_hyp_panic) 918ENDPROC(__kvm_hyp_panic)
915 919
916__hyp_panic_str: 920__hyp_panic_str:
917 .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0" 921 .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0"
918 922
919 .align 2 923 .align 2
920 924
@@ -1015,9 +1019,15 @@ el1_trap:
1015 b.ne 1f // Not an abort we care about 1019 b.ne 1f // Not an abort we care about
1016 1020
1017 /* This is an abort. Check for permission fault */ 1021 /* This is an abort. Check for permission fault */
1022alternative_if_not ARM64_WORKAROUND_834220
1018 and x2, x1, #ESR_ELx_FSC_TYPE 1023 and x2, x1, #ESR_ELx_FSC_TYPE
1019 cmp x2, #FSC_PERM 1024 cmp x2, #FSC_PERM
1020 b.ne 1f // Not a permission fault 1025 b.ne 1f // Not a permission fault
1026alternative_else
1027 nop // Use the permission fault path to
1028 nop // check for a valid S1 translation,
1029 nop // regardless of the ESR value.
1030alternative_endif
1021 1031
1022 /* 1032 /*
1023 * Check for Stage-1 page table walk, which is guaranteed 1033 * Check for Stage-1 page table walk, which is guaranteed
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 85c57158dcd9..648112e90ed5 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -48,7 +48,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
48 48
49 /* Note: These now point to the banked copies */ 49 /* Note: These now point to the banked copies */
50 *vcpu_spsr(vcpu) = new_spsr_value; 50 *vcpu_spsr(vcpu) = new_spsr_value;
51 *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; 51 *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
52 52
53 /* Branch to exception vector */ 53 /* Branch to exception vector */
54 if (sctlr & (1 << 13)) 54 if (sctlr & (1 << 13))
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 131a199114b4..7963aa4b5d28 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/gfp.h> 20#include <linux/gfp.h>
21#include <linux/acpi.h>
21#include <linux/export.h> 22#include <linux/export.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/genalloc.h> 24#include <linux/genalloc.h>
@@ -28,9 +29,6 @@
28 29
29#include <asm/cacheflush.h> 30#include <asm/cacheflush.h>
30 31
31struct dma_map_ops *dma_ops;
32EXPORT_SYMBOL(dma_ops);
33
34static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, 32static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
35 bool coherent) 33 bool coherent)
36{ 34{
@@ -515,13 +513,7 @@ EXPORT_SYMBOL(dummy_dma_ops);
515 513
516static int __init arm64_dma_init(void) 514static int __init arm64_dma_init(void)
517{ 515{
518 int ret; 516 return atomic_pool_init();
519
520 dma_ops = &swiotlb_dma_ops;
521
522 ret = atomic_pool_init();
523
524 return ret;
525} 517}
526arch_initcall(arm64_dma_init); 518arch_initcall(arm64_dma_init);
527 519
@@ -552,10 +544,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
552{ 544{
553 bool coherent = is_device_dma_coherent(dev); 545 bool coherent = is_device_dma_coherent(dev);
554 int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); 546 int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
547 size_t iosize = size;
555 void *addr; 548 void *addr;
556 549
557 if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) 550 if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
558 return NULL; 551 return NULL;
552
553 size = PAGE_ALIGN(size);
554
559 /* 555 /*
560 * Some drivers rely on this, and we probably don't want the 556 * Some drivers rely on this, and we probably don't want the
561 * possibility of stale kernel data being read by devices anyway. 557 * possibility of stale kernel data being read by devices anyway.
@@ -566,7 +562,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
566 struct page **pages; 562 struct page **pages;
567 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); 563 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
568 564
569 pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle, 565 pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle,
570 flush_page); 566 flush_page);
571 if (!pages) 567 if (!pages)
572 return NULL; 568 return NULL;
@@ -574,7 +570,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
574 addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, 570 addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
575 __builtin_return_address(0)); 571 __builtin_return_address(0));
576 if (!addr) 572 if (!addr)
577 iommu_dma_free(dev, pages, size, handle); 573 iommu_dma_free(dev, pages, iosize, handle);
578 } else { 574 } else {
579 struct page *page; 575 struct page *page;
580 /* 576 /*
@@ -591,7 +587,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
591 if (!addr) 587 if (!addr)
592 return NULL; 588 return NULL;
593 589
594 *handle = iommu_dma_map_page(dev, page, 0, size, ioprot); 590 *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
595 if (iommu_dma_mapping_error(dev, *handle)) { 591 if (iommu_dma_mapping_error(dev, *handle)) {
596 if (coherent) 592 if (coherent)
597 __free_pages(page, get_order(size)); 593 __free_pages(page, get_order(size));
@@ -606,6 +602,9 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
606static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 602static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
607 dma_addr_t handle, struct dma_attrs *attrs) 603 dma_addr_t handle, struct dma_attrs *attrs)
608{ 604{
605 size_t iosize = size;
606
607 size = PAGE_ALIGN(size);
609 /* 608 /*
610 * @cpu_addr will be one of 3 things depending on how it was allocated: 609 * @cpu_addr will be one of 3 things depending on how it was allocated:
611 * - A remapped array of pages from iommu_dma_alloc(), for all 610 * - A remapped array of pages from iommu_dma_alloc(), for all
@@ -617,17 +616,17 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
617 * Hence how dodgy the below logic looks... 616 * Hence how dodgy the below logic looks...
618 */ 617 */
619 if (__in_atomic_pool(cpu_addr, size)) { 618 if (__in_atomic_pool(cpu_addr, size)) {
620 iommu_dma_unmap_page(dev, handle, size, 0, NULL); 619 iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
621 __free_from_pool(cpu_addr, size); 620 __free_from_pool(cpu_addr, size);
622 } else if (is_vmalloc_addr(cpu_addr)){ 621 } else if (is_vmalloc_addr(cpu_addr)){
623 struct vm_struct *area = find_vm_area(cpu_addr); 622 struct vm_struct *area = find_vm_area(cpu_addr);
624 623
625 if (WARN_ON(!area || !area->pages)) 624 if (WARN_ON(!area || !area->pages))
626 return; 625 return;
627 iommu_dma_free(dev, area->pages, size, &handle); 626 iommu_dma_free(dev, area->pages, iosize, &handle);
628 dma_common_free_remap(cpu_addr, size, VM_USERMAP); 627 dma_common_free_remap(cpu_addr, size, VM_USERMAP);
629 } else { 628 } else {
630 iommu_dma_unmap_page(dev, handle, size, 0, NULL); 629 iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
631 __free_pages(virt_to_page(cpu_addr), get_order(size)); 630 __free_pages(virt_to_page(cpu_addr), get_order(size));
632 } 631 }
633} 632}
@@ -984,8 +983,8 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
984void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 983void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
985 struct iommu_ops *iommu, bool coherent) 984 struct iommu_ops *iommu, bool coherent)
986{ 985{
987 if (!acpi_disabled && !dev->archdata.dma_ops) 986 if (!dev->archdata.dma_ops)
988 dev->archdata.dma_ops = dma_ops; 987 dev->archdata.dma_ops = &swiotlb_dma_ops;
989 988
990 dev->archdata.dma_coherent = coherent; 989 dev->archdata.dma_coherent = coherent;
991 __iommu_setup_dma_ops(dev, dma_base, size, iommu); 990 __iommu_setup_dma_ops(dev, dma_base, size, iommu);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e3f563c81c48..abb66f84d4ac 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -362,8 +362,8 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
362 * for now. This will get more fine grained later once all memory 362 * for now. This will get more fine grained later once all memory
363 * is mapped 363 * is mapped
364 */ 364 */
365 unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); 365 unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
366 unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); 366 unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
367 367
368 if (end < kernel_x_start) { 368 if (end < kernel_x_start) {
369 create_mapping(start, __phys_to_virt(start), 369 create_mapping(start, __phys_to_virt(start),
@@ -451,18 +451,18 @@ static void __init fixup_executable(void)
451{ 451{
452#ifdef CONFIG_DEBUG_RODATA 452#ifdef CONFIG_DEBUG_RODATA
453 /* now that we are actually fully mapped, make the start/end more fine grained */ 453 /* now that we are actually fully mapped, make the start/end more fine grained */
454 if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { 454 if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
455 unsigned long aligned_start = round_down(__pa(_stext), 455 unsigned long aligned_start = round_down(__pa(_stext),
456 SECTION_SIZE); 456 SWAPPER_BLOCK_SIZE);
457 457
458 create_mapping(aligned_start, __phys_to_virt(aligned_start), 458 create_mapping(aligned_start, __phys_to_virt(aligned_start),
459 __pa(_stext) - aligned_start, 459 __pa(_stext) - aligned_start,
460 PAGE_KERNEL); 460 PAGE_KERNEL);
461 } 461 }
462 462
463 if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { 463 if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
464 unsigned long aligned_end = round_up(__pa(__init_end), 464 unsigned long aligned_end = round_up(__pa(__init_end),
465 SECTION_SIZE); 465 SWAPPER_BLOCK_SIZE);
466 create_mapping(__pa(__init_end), (unsigned long)__init_end, 466 create_mapping(__pa(__init_end), (unsigned long)__init_end,
467 aligned_end - __pa(__init_end), 467 aligned_end - __pa(__init_end),
468 PAGE_KERNEL); 468 PAGE_KERNEL);
@@ -475,7 +475,7 @@ void mark_rodata_ro(void)
475{ 475{
476 create_mapping_late(__pa(_stext), (unsigned long)_stext, 476 create_mapping_late(__pa(_stext), (unsigned long)_stext,
477 (unsigned long)_etext - (unsigned long)_stext, 477 (unsigned long)_etext - (unsigned long)_stext,
478 PAGE_KERNEL_EXEC | PTE_RDONLY); 478 PAGE_KERNEL_ROX);
479 479
480} 480}
481#endif 481#endif
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index cf3c7d4a1b58..d6a53ef2350b 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -50,7 +50,7 @@ static const int bpf2a64[] = {
50 [BPF_REG_8] = A64_R(21), 50 [BPF_REG_8] = A64_R(21),
51 [BPF_REG_9] = A64_R(22), 51 [BPF_REG_9] = A64_R(22),
52 /* read-only frame pointer to access stack */ 52 /* read-only frame pointer to access stack */
53 [BPF_REG_FP] = A64_FP, 53 [BPF_REG_FP] = A64_R(25),
54 /* temporary register for internal BPF JIT */ 54 /* temporary register for internal BPF JIT */
55 [TMP_REG_1] = A64_R(23), 55 [TMP_REG_1] = A64_R(23),
56 [TMP_REG_2] = A64_R(24), 56 [TMP_REG_2] = A64_R(24),
@@ -155,18 +155,49 @@ static void build_prologue(struct jit_ctx *ctx)
155 stack_size += 4; /* extra for skb_copy_bits buffer */ 155 stack_size += 4; /* extra for skb_copy_bits buffer */
156 stack_size = STACK_ALIGN(stack_size); 156 stack_size = STACK_ALIGN(stack_size);
157 157
158 /*
159 * BPF prog stack layout
160 *
161 * high
162 * original A64_SP => 0:+-----+ BPF prologue
163 * |FP/LR|
164 * current A64_FP => -16:+-----+
165 * | ... | callee saved registers
166 * +-----+
167 * | | x25/x26
168 * BPF fp register => -80:+-----+
169 * | |
170 * | ... | BPF prog stack
171 * | |
172 * | |
173 * current A64_SP => +-----+
174 * | |
175 * | ... | Function call stack
176 * | |
177 * +-----+
178 * low
179 *
180 */
181
182 /* Save FP and LR registers to stay align with ARM64 AAPCS */
183 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
184 emit(A64_MOV(1, A64_FP, A64_SP), ctx);
185
158 /* Save callee-saved register */ 186 /* Save callee-saved register */
159 emit(A64_PUSH(r6, r7, A64_SP), ctx); 187 emit(A64_PUSH(r6, r7, A64_SP), ctx);
160 emit(A64_PUSH(r8, r9, A64_SP), ctx); 188 emit(A64_PUSH(r8, r9, A64_SP), ctx);
161 if (ctx->tmp_used) 189 if (ctx->tmp_used)
162 emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx); 190 emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
163 191
164 /* Set up BPF stack */ 192 /* Save fp (x25) and x26. SP requires 16 bytes alignment */
165 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); 193 emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
166 194
167 /* Set up frame pointer */ 195 /* Set up BPF prog stack base register (x25) */
168 emit(A64_MOV(1, fp, A64_SP), ctx); 196 emit(A64_MOV(1, fp, A64_SP), ctx);
169 197
198 /* Set up function call stack */
199 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
200
170 /* Clear registers A and X */ 201 /* Clear registers A and X */
171 emit_a64_mov_i64(ra, 0, ctx); 202 emit_a64_mov_i64(ra, 0, ctx);
172 emit_a64_mov_i64(rx, 0, ctx); 203 emit_a64_mov_i64(rx, 0, ctx);
@@ -190,14 +221,17 @@ static void build_epilogue(struct jit_ctx *ctx)
190 /* We're done with BPF stack */ 221 /* We're done with BPF stack */
191 emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx); 222 emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx);
192 223
224 /* Restore fs (x25) and x26 */
225 emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
226
193 /* Restore callee-saved register */ 227 /* Restore callee-saved register */
194 if (ctx->tmp_used) 228 if (ctx->tmp_used)
195 emit(A64_POP(tmp1, tmp2, A64_SP), ctx); 229 emit(A64_POP(tmp1, tmp2, A64_SP), ctx);
196 emit(A64_POP(r8, r9, A64_SP), ctx); 230 emit(A64_POP(r8, r9, A64_SP), ctx);
197 emit(A64_POP(r6, r7, A64_SP), ctx); 231 emit(A64_POP(r6, r7, A64_SP), ctx);
198 232
199 /* Restore frame pointer */ 233 /* Restore FP/LR registers */
200 emit(A64_MOV(1, fp, A64_SP), ctx); 234 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
201 235
202 /* Set return value */ 236 /* Set return value */
203 emit(A64_MOV(1, A64_R(0), r0), ctx); 237 emit(A64_MOV(1, A64_R(0), r0), ctx);
@@ -758,7 +792,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
758 if (bpf_jit_enable > 1) 792 if (bpf_jit_enable > 1)
759 bpf_jit_dump(prog->len, image_size, 2, ctx.image); 793 bpf_jit_dump(prog->len, image_size, 2, ctx.image);
760 794
761 bpf_flush_icache(ctx.image, ctx.image + ctx.idx); 795 bpf_flush_icache(header, ctx.image + ctx.idx);
762 796
763 set_memory_ro((unsigned long)header, header->pages); 797 set_memory_ro((unsigned long)header, header->pages);
764 prog->bpf_func = (void *)ctx.image; 798 prog->bpf_func = (void *)ctx.image;