aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 16:57:43 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 16:57:43 -0500
commit0aebc6a440b942df6221a7765f077f02217e0114 (patch)
treeb5004b4194cc7e69c76465287f34263bf0b5aab5
parent72906f38934a49faf4d2d38ea9ae32adcf7d5d0c (diff)
parentec89ab50a03a33a4a648869e868b1964354fb2d1 (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: "The main theme of this pull request is security covering variants 2 and 3 for arm64. I expect to send additional patches next week covering an improved firmware interface (requires firmware changes) for variant 2 and way for KPTI to be disabled on unaffected CPUs (Cavium's ThunderX doesn't work properly with KPTI enabled because of a hardware erratum). Summary: - Security mitigations: - variant 2: invalidate the branch predictor with a call to secure firmware - variant 3: implement KPTI for arm64 - 52-bit physical address support for arm64 (ARMv8.2) - arm64 support for RAS (firmware first only) and SDEI (software delegated exception interface; allows firmware to inject a RAS error into the OS) - perf support for the ARM DynamIQ Shared Unit PMU - CPUID and HWCAP bits updated for new floating point multiplication instructions in ARMv8.4 - remove some virtual memory layout printks during boot - fix initial page table creation to cope with larger than 32M kernel images when 16K pages are enabled" * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (104 commits) arm64: Fix TTBR + PAN + 52-bit PA logic in cpu_do_switch_mm arm64: Turn on KPTI only on CPUs that need it arm64: Branch predictor hardening for Cavium ThunderX2 arm64: Run enable method for errata work arounds on late CPUs arm64: Move BP hardening to check_and_switch_context arm64: mm: ignore memory above supported physical address size arm64: kpti: Fix the interaction between ASID switching and software PAN KVM: arm64: Emulate RAS error registers and set HCR_EL2's TERR & TEA KVM: arm64: Handle RAS SErrors from EL2 on guest exit KVM: arm64: Handle RAS SErrors from EL1 on guest exit KVM: arm64: Save ESR_EL2 on guest SError KVM: arm64: Save/Restore guest DISR_EL1 KVM: arm64: Set an impdef ESR for Virtual-SError using VSESR_EL2. KVM: arm/arm64: mask/unmask daif around VHE guests arm64: kernel: Prepare for a DISR user arm64: Unconditionally enable IESB on exception entry/return for firmware-first arm64: kernel: Survive corrected RAS errors notified by SError arm64: cpufeature: Detect CPU RAS Extentions arm64: sysreg: Move to use definitions for all the SCTLR bits arm64: cpufeature: __this_cpu_has_cap() shouldn't stop early ...
-rw-r--r--Documentation/arm64/cpu-feature-registers.txt4
-rw-r--r--Documentation/arm64/elf_hwcaps.txt4
-rw-r--r--Documentation/arm64/silicon-errata.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/firmware/sdei.txt42
-rw-r--r--Documentation/perf/arm_dsu_pmu.txt28
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/include/asm/kvm_mmu.h17
-rw-r--r--arch/arm64/Kconfig91
-rw-r--r--arch/arm64/include/asm/alternative.h2
-rw-r--r--arch/arm64/include/asm/arm_dsu_pmu.h129
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h42
-rw-r--r--arch/arm64/include/asm/assembler.h75
-rw-r--r--arch/arm64/include/asm/cpucaps.h6
-rw-r--r--arch/arm64/include/asm/cputype.h9
-rw-r--r--arch/arm64/include/asm/efi.h12
-rw-r--r--arch/arm64/include/asm/esr.h20
-rw-r--r--arch/arm64/include/asm/exception.h14
-rw-r--r--arch/arm64/include/asm/fixmap.h5
-rw-r--r--arch/arm64/include/asm/fpsimd.h2
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h59
-rw-r--r--arch/arm64/include/asm/kvm_arm.h2
-rw-r--r--arch/arm64/include/asm/kvm_asm.h2
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h17
-rw-r--r--arch/arm64/include/asm/kvm_host.h19
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h59
-rw-r--r--arch/arm64/include/asm/mmu.h49
-rw-r--r--arch/arm64/include/asm/mmu_context.h27
-rw-r--r--arch/arm64/include/asm/percpu.h11
-rw-r--r--arch/arm64/include/asm/pgalloc.h6
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h32
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h21
-rw-r--r--arch/arm64/include/asm/pgtable.h57
-rw-r--r--arch/arm64/include/asm/proc-fns.h6
-rw-r--r--arch/arm64/include/asm/processor.h1
-rw-r--r--arch/arm64/include/asm/sdei.h57
-rw-r--r--arch/arm64/include/asm/sections.h1
-rw-r--r--arch/arm64/include/asm/sparsemem.h2
-rw-r--r--arch/arm64/include/asm/stacktrace.h3
-rw-r--r--arch/arm64/include/asm/sysreg.h92
-rw-r--r--arch/arm64/include/asm/tlbflush.h16
-rw-r--r--arch/arm64/include/asm/traps.h54
-rw-r--r--arch/arm64/include/asm/uaccess.h40
-rw-r--r--arch/arm64/include/asm/vmap_stack.h28
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/kernel/Makefile5
-rw-r--r--arch/arm64/kernel/acpi.c2
-rw-r--r--arch/arm64/kernel/alternative.c9
-rw-r--r--arch/arm64/kernel/asm-offsets.c12
-rw-r--r--arch/arm64/kernel/bpi.S87
-rw-r--r--arch/arm64/kernel/cpu_errata.c192
-rw-r--r--arch/arm64/kernel/cpufeature.c146
-rw-r--r--arch/arm64/kernel/cpuidle.c8
-rw-r--r--arch/arm64/kernel/cpuinfo.c1
-rw-r--r--arch/arm64/kernel/entry.S396
-rw-r--r--arch/arm64/kernel/fpsimd.c4
-rw-r--r--arch/arm64/kernel/head.S245
-rw-r--r--arch/arm64/kernel/hibernate-asm.S12
-rw-r--r--arch/arm64/kernel/hibernate.c5
-rw-r--r--arch/arm64/kernel/irq.c13
-rw-r--r--arch/arm64/kernel/process.c12
-rw-r--r--arch/arm64/kernel/sdei.c235
-rw-r--r--arch/arm64/kernel/signal.c7
-rw-r--r--arch/arm64/kernel/signal32.c5
-rw-r--r--arch/arm64/kernel/smp.c11
-rw-r--r--arch/arm64/kernel/suspend.c4
-rw-r--r--arch/arm64/kernel/topology.c16
-rw-r--r--arch/arm64/kernel/traps.c51
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S27
-rw-r--r--arch/arm64/kvm/handle_exit.c32
-rw-r--r--arch/arm64/kvm/hyp-init.S30
-rw-r--r--arch/arm64/kvm/hyp/entry.S35
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S18
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c2
-rw-r--r--arch/arm64/kvm/hyp/switch.c60
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c22
-rw-r--r--arch/arm64/kvm/inject_fault.c13
-rw-r--r--arch/arm64/kvm/sys_regs.c11
-rw-r--r--arch/arm64/lib/clear_user.S4
-rw-r--r--arch/arm64/lib/copy_from_user.S4
-rw-r--r--arch/arm64/lib/copy_in_user.S4
-rw-r--r--arch/arm64/lib/copy_to_user.S4
-rw-r--r--arch/arm64/lib/tishift.S8
-rw-r--r--arch/arm64/mm/cache.S4
-rw-r--r--arch/arm64/mm/context.c67
-rw-r--r--arch/arm64/mm/fault.c17
-rw-r--r--arch/arm64/mm/init.c46
-rw-r--r--arch/arm64/mm/mmu.c47
-rw-r--r--arch/arm64/mm/pgd.c8
-rw-r--r--arch/arm64/mm/proc.S66
-rw-r--r--arch/arm64/xen/hypercall.S4
-rw-r--r--drivers/firmware/Kconfig8
-rw-r--r--drivers/firmware/Makefile1
-rw-r--r--drivers/firmware/arm_sdei.c1092
-rw-r--r--drivers/firmware/psci.c2
-rw-r--r--drivers/hwtracing/coresight/of_coresight.c15
-rw-r--r--drivers/irqchip/irq-gic-v3.c29
-rw-r--r--drivers/of/base.c26
-rw-r--r--drivers/perf/Kconfig9
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm_dsu_pmu.c843
-rw-r--r--drivers/perf/arm_pmu_platform.c15
-rw-r--r--drivers/perf/arm_spe_pmu.c9
-rw-r--r--include/linux/arm_sdei.h79
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/cpuidle.h40
-rw-r--r--include/linux/of.h7
-rw-r--r--include/linux/psci.h4
-rw-r--r--include/uapi/linux/arm_sdei.h73
-rw-r--r--kernel/events/core.c1
-rw-r--r--virt/kvm/arm/arm.c35
-rw-r--r--virt/kvm/arm/mmu.c12
113 files changed, 4941 insertions, 579 deletions
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
index bd9b3faab2c4..a70090b28b07 100644
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -110,7 +110,9 @@ infrastructure:
110 x--------------------------------------------------x 110 x--------------------------------------------------x
111 | Name | bits | visible | 111 | Name | bits | visible |
112 |--------------------------------------------------| 112 |--------------------------------------------------|
113 | RES0 | [63-48] | n | 113 | RES0 | [63-52] | n |
114 |--------------------------------------------------|
115 | FHM | [51-48] | y |
114 |--------------------------------------------------| 116 |--------------------------------------------------|
115 | DP | [47-44] | y | 117 | DP | [47-44] | y |
116 |--------------------------------------------------| 118 |--------------------------------------------------|
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index 89edba12a9e0..57324ee55ecc 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -158,3 +158,7 @@ HWCAP_SHA512
158HWCAP_SVE 158HWCAP_SVE
159 159
160 Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001. 160 Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
161
162HWCAP_ASIMDFHM
163
164 Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index fc1c884fea10..c1d520de6dfe 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -72,7 +72,7 @@ stable kernels.
72| Hisilicon | Hip0{6,7} | #161010701 | N/A | 72| Hisilicon | Hip0{6,7} | #161010701 | N/A |
73| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 | 73| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 |
74| | | | | 74| | | | |
75| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | 75| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
76| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | 76| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
77| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | 77| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
78| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 | 78| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
diff --git a/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt
new file mode 100644
index 000000000000..6efabba530f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt
@@ -0,0 +1,27 @@
1* ARM DynamIQ Shared Unit (DSU) Performance Monitor Unit (PMU)
2
3ARM DyanmIQ Shared Unit (DSU) integrates one or more CPU cores
4with a shared L3 memory system, control logic and external interfaces to
5form a multicore cluster. The PMU enables to gather various statistics on
6the operations of the DSU. The PMU provides independent 32bit counters that
7can count any of the supported events, along with a 64bit cycle counter.
8The PMU is accessed via CPU system registers and has no MMIO component.
9
10** DSU PMU required properties:
11
12- compatible : should be one of :
13
14 "arm,dsu-pmu"
15
16- interrupts : Exactly 1 SPI must be listed.
17
18- cpus : List of phandles for the CPUs connected to this DSU instance.
19
20
21** Example:
22
23dsu-pmu-0 {
24 compatible = "arm,dsu-pmu";
25 interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>;
26 cpus = <&cpu_0>, <&cpu_1>;
27};
diff --git a/Documentation/devicetree/bindings/arm/firmware/sdei.txt b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
new file mode 100644
index 000000000000..ee3f0ff49889
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
@@ -0,0 +1,42 @@
1* Software Delegated Exception Interface (SDEI)
2
3Firmware implementing the SDEI functions described in ARM document number
4ARM DEN 0054A ("Software Delegated Exception Interface") can be used by
5Linux to receive notification of events such as those generated by
6firmware-first error handling, or from an IRQ that has been promoted to
7a firmware-assisted NMI.
8
9The interface provides a number of API functions for registering callbacks
10and enabling/disabling events. Functions are invoked by trapping to the
11privilege level of the SDEI firmware (specified as part of the binding
12below) and passing arguments in a manner specified by the "SMC Calling
13Convention (ARM DEN 0028B):
14
15 r0 => 32-bit Function ID / return value
16 {r1 - r3} => Parameters
17
18Note that the immediate field of the trapping instruction must be set
19to #0.
20
21The SDEI_EVENT_REGISTER function registers a callback in the kernel
22text to handle the specified event number.
23
24The sdei node should be a child node of '/firmware' and have required
25properties:
26
27 - compatible : should contain:
28 * "arm,sdei-1.0" : For implementations complying to SDEI version 1.x.
29
30 - method : The method of calling the SDEI firmware. Permitted
31 values are:
32 * "smc" : SMC #0, with the register assignments specified in this
33 binding.
34 * "hvc" : HVC #0, with the register assignments specified in this
35 binding.
36Example:
37 firmware {
38 sdei {
39 compatible = "arm,sdei-1.0";
40 method = "smc";
41 };
42 };
diff --git a/Documentation/perf/arm_dsu_pmu.txt b/Documentation/perf/arm_dsu_pmu.txt
new file mode 100644
index 000000000000..d611e15f5add
--- /dev/null
+++ b/Documentation/perf/arm_dsu_pmu.txt
@@ -0,0 +1,28 @@
1ARM DynamIQ Shared Unit (DSU) PMU
2==================================
3
4ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system,
5control logic and external interfaces to form a multicore cluster. The PMU
6allows counting the various events related to the L3 cache, Snoop Control Unit
7etc, using 32bit independent counters. It also provides a 64bit cycle counter.
8
9The PMU can only be accessed via CPU system registers and are common to the
10cores connected to the same DSU. Like most of the other uncore PMUs, DSU
11PMU doesn't support process specific events and cannot be used in sampling mode.
12
13The DSU provides a bitmap for a subset of implemented events via hardware
14registers. There is no way for the driver to determine if the other events
15are available or not. Hence the driver exposes only those events advertised
16by the DSU, in "events" directory under :
17
18 /sys/bus/event_sources/devices/arm_dsu_<N>/
19
20The user should refer to the TRM of the product to figure out the supported events
21and use the raw event code for the unlisted events.
22
23The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
24
25
26e.g usage :
27
28 perf stat -a -e arm_dsu_0/cycles/
diff --git a/MAINTAINERS b/MAINTAINERS
index 217a8759e897..98ee6febcf45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12645,6 +12645,15 @@ L: linux-media@vger.kernel.org
12645S: Supported 12645S: Supported
12646F: drivers/media/pci/solo6x10/ 12646F: drivers/media/pci/solo6x10/
12647 12647
12648SOFTWARE DELEGATED EXCEPTION INTERFACE (SDEI)
12649M: James Morse <james.morse@arm.com>
12650L: linux-arm-kernel@lists.infradead.org
12651S: Maintained
12652F: Documentation/devicetree/bindings/arm/firmware/sdei.txt
12653F: drivers/firmware/arm_sdei.c
12654F: include/linux/sdei.h
12655F: include/uapi/linux/sdei.h
12656
12648SOFTWARE RAID (Multiple Disks) SUPPORT 12657SOFTWARE RAID (Multiple Disks) SUPPORT
12649M: Shaohua Li <shli@kernel.org> 12658M: Shaohua Li <shli@kernel.org>
12650L: linux-raid@vger.kernel.org 12659L: linux-raid@vger.kernel.org
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index a9f7d3f47134..acbf9ec7b396 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -238,6 +238,9 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
238int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, 238int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
239 int exception_index); 239 int exception_index);
240 240
241static inline void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
242 int exception_index) {}
243
241static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, 244static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
242 unsigned long hyp_stack_ptr, 245 unsigned long hyp_stack_ptr,
243 unsigned long vector_ptr) 246 unsigned long vector_ptr)
@@ -301,4 +304,6 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
301/* All host FP/SIMD state is restored on guest exit, so nothing to save: */ 304/* All host FP/SIMD state is restored on guest exit, so nothing to save: */
302static inline void kvm_fpsimd_flush_cpu_state(void) {} 305static inline void kvm_fpsimd_flush_cpu_state(void) {}
303 306
307static inline void kvm_arm_vhe_guest_enter(void) {}
308static inline void kvm_arm_vhe_guest_exit(void) {}
304#endif /* __ARM_KVM_HOST_H__ */ 309#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index fa6f2174276b..a2d176a308bd 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void)
211 return false; 211 return false;
212} 212}
213 213
214static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
215{
216 return PTRS_PER_PGD;
217}
218
214static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, 219static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
215 pgd_t *hyp_pgd, 220 pgd_t *hyp_pgd,
216 pgd_t *merged_hyp_pgd, 221 pgd_t *merged_hyp_pgd,
@@ -221,6 +226,18 @@ static inline unsigned int kvm_get_vmid_bits(void)
221 return 8; 226 return 8;
222} 227}
223 228
229static inline void *kvm_get_hyp_vector(void)
230{
231 return kvm_ksym_ref(__kvm_hyp_vector);
232}
233
234static inline int kvm_map_vectors(void)
235{
236 return 0;
237}
238
239#define kvm_phys_to_vttbr(addr) (addr)
240
224#endif /* !__ASSEMBLY__ */ 241#endif /* !__ASSEMBLY__ */
225 242
226#endif /* __ARM_KVM_MMU_H__ */ 243#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c9a7e9e1414f..b488076d63c2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -522,20 +522,13 @@ config CAVIUM_ERRATUM_30115
522config QCOM_FALKOR_ERRATUM_1003 522config QCOM_FALKOR_ERRATUM_1003
523 bool "Falkor E1003: Incorrect translation due to ASID change" 523 bool "Falkor E1003: Incorrect translation due to ASID change"
524 default y 524 default y
525 select ARM64_PAN if ARM64_SW_TTBR0_PAN
526 help 525 help
527 On Falkor v1, an incorrect ASID may be cached in the TLB when ASID 526 On Falkor v1, an incorrect ASID may be cached in the TLB when ASID
528 and BADDR are changed together in TTBRx_EL1. The workaround for this 527 and BADDR are changed together in TTBRx_EL1. Since we keep the ASID
529 issue is to use a reserved ASID in cpu_do_switch_mm() before 528 in TTBR1_EL1, this situation only occurs in the entry trampoline and
530 switching to the new ASID. Saying Y here selects ARM64_PAN if 529 then only for entries in the walk cache, since the leaf translation
531 ARM64_SW_TTBR0_PAN is selected. This is done because implementing and 530 is unchanged. Work around the erratum by invalidating the walk cache
532 maintaining the E1003 workaround in the software PAN emulation code 531 entries for the trampoline before entering the kernel proper.
533 would be an unnecessary complication. The affected Falkor v1 CPU
534 implements ARMv8.1 hardware PAN support and using hardware PAN
535 support versus software PAN emulation is mutually exclusive at
536 runtime.
537
538 If unsure, say Y.
539 532
540config QCOM_FALKOR_ERRATUM_1009 533config QCOM_FALKOR_ERRATUM_1009
541 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" 534 bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
@@ -656,6 +649,35 @@ config ARM64_VA_BITS
656 default 47 if ARM64_VA_BITS_47 649 default 47 if ARM64_VA_BITS_47
657 default 48 if ARM64_VA_BITS_48 650 default 48 if ARM64_VA_BITS_48
658 651
652choice
653 prompt "Physical address space size"
654 default ARM64_PA_BITS_48
655 help
656 Choose the maximum physical address range that the kernel will
657 support.
658
659config ARM64_PA_BITS_48
660 bool "48-bit"
661
662config ARM64_PA_BITS_52
663 bool "52-bit (ARMv8.2)"
664 depends on ARM64_64K_PAGES
665 depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
666 help
667 Enable support for a 52-bit physical address space, introduced as
668 part of the ARMv8.2-LPA extension.
669
670 With this enabled, the kernel will also continue to work on CPUs that
671 do not support ARMv8.2-LPA, but with some added memory overhead (and
672 minor performance overhead).
673
674endchoice
675
676config ARM64_PA_BITS
677 int
678 default 48 if ARM64_PA_BITS_48
679 default 52 if ARM64_PA_BITS_52
680
659config CPU_BIG_ENDIAN 681config CPU_BIG_ENDIAN
660 bool "Build big-endian kernel" 682 bool "Build big-endian kernel"
661 help 683 help
@@ -850,6 +872,35 @@ config FORCE_MAX_ZONEORDER
850 However for 4K, we choose a higher default value, 11 as opposed to 10, giving us 872 However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
851 4M allocations matching the default size used by generic code. 873 4M allocations matching the default size used by generic code.
852 874
875config UNMAP_KERNEL_AT_EL0
876 bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
877 default y
878 help
879 Speculation attacks against some high-performance processors can
880 be used to bypass MMU permission checks and leak kernel data to
881 userspace. This can be defended against by unmapping the kernel
882 when running in userspace, mapping it back in on exception entry
883 via a trampoline page in the vector table.
884
885 If unsure, say Y.
886
887config HARDEN_BRANCH_PREDICTOR
888 bool "Harden the branch predictor against aliasing attacks" if EXPERT
889 default y
890 help
891 Speculation attacks against some high-performance processors rely on
892 being able to manipulate the branch predictor for a victim context by
893 executing aliasing branches in the attacker context. Such attacks
894 can be partially mitigated against by clearing internal branch
895 predictor state and limiting the prediction logic in some situations.
896
897 This config option will take CPU-specific actions to harden the
898 branch predictor against aliasing attacks and may rely on specific
899 instruction sequences or control bits being set by the system
900 firmware.
901
902 If unsure, say Y.
903
853menuconfig ARMV8_DEPRECATED 904menuconfig ARMV8_DEPRECATED
854 bool "Emulate deprecated/obsolete ARMv8 instructions" 905 bool "Emulate deprecated/obsolete ARMv8 instructions"
855 depends on COMPAT 906 depends on COMPAT
@@ -1021,6 +1072,22 @@ config ARM64_PMEM
1021 operations if DC CVAP is not supported (following the behaviour of 1072 operations if DC CVAP is not supported (following the behaviour of
1022 DC CVAP itself if the system does not define a point of persistence). 1073 DC CVAP itself if the system does not define a point of persistence).
1023 1074
1075config ARM64_RAS_EXTN
1076 bool "Enable support for RAS CPU Extensions"
1077 default y
1078 help
1079 CPUs that support the Reliability, Availability and Serviceability
1080 (RAS) Extensions, part of ARMv8.2 are able to track faults and
1081 errors, classify them and report them to software.
1082
1083 On CPUs with these extensions system software can use additional
1084 barriers to determine if faults are pending and read the
1085 classification from a new set of registers.
1086
1087 Selecting this feature will allow the kernel to use these barriers
1088 and access the new registers if the system supports the extension.
1089 Platform RAS features may additionally depend on firmware support.
1090
1024endmenu 1091endmenu
1025 1092
1026config ARM64_SVE 1093config ARM64_SVE
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 4a85c6952a22..669028172fd6 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -12,6 +12,8 @@
12#include <linux/stddef.h> 12#include <linux/stddef.h>
13#include <linux/stringify.h> 13#include <linux/stringify.h>
14 14
15extern int alternatives_applied;
16
15struct alt_instr { 17struct alt_instr {
16 s32 orig_offset; /* offset to original instruction */ 18 s32 orig_offset; /* offset to original instruction */
17 s32 alt_offset; /* offset to replacement instruction */ 19 s32 alt_offset; /* offset to replacement instruction */
diff --git a/arch/arm64/include/asm/arm_dsu_pmu.h b/arch/arm64/include/asm/arm_dsu_pmu.h
new file mode 100644
index 000000000000..82e5cc3356bf
--- /dev/null
+++ b/arch/arm64/include/asm/arm_dsu_pmu.h
@@ -0,0 +1,129 @@
1/*
2 * ARM DynamIQ Shared Unit (DSU) PMU Low level register access routines.
3 *
4 * Copyright (C) ARM Limited, 2017.
5 *
6 * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * version 2, as published by the Free Software Foundation.
11 */
12
13#include <linux/bitops.h>
14#include <linux/build_bug.h>
15#include <linux/compiler.h>
16#include <linux/types.h>
17#include <asm/barrier.h>
18#include <asm/sysreg.h>
19
20
21#define CLUSTERPMCR_EL1 sys_reg(3, 0, 15, 5, 0)
22#define CLUSTERPMCNTENSET_EL1 sys_reg(3, 0, 15, 5, 1)
23#define CLUSTERPMCNTENCLR_EL1 sys_reg(3, 0, 15, 5, 2)
24#define CLUSTERPMOVSSET_EL1 sys_reg(3, 0, 15, 5, 3)
25#define CLUSTERPMOVSCLR_EL1 sys_reg(3, 0, 15, 5, 4)
26#define CLUSTERPMSELR_EL1 sys_reg(3, 0, 15, 5, 5)
27#define CLUSTERPMINTENSET_EL1 sys_reg(3, 0, 15, 5, 6)
28#define CLUSTERPMINTENCLR_EL1 sys_reg(3, 0, 15, 5, 7)
29#define CLUSTERPMCCNTR_EL1 sys_reg(3, 0, 15, 6, 0)
30#define CLUSTERPMXEVTYPER_EL1 sys_reg(3, 0, 15, 6, 1)
31#define CLUSTERPMXEVCNTR_EL1 sys_reg(3, 0, 15, 6, 2)
32#define CLUSTERPMMDCR_EL1 sys_reg(3, 0, 15, 6, 3)
33#define CLUSTERPMCEID0_EL1 sys_reg(3, 0, 15, 6, 4)
34#define CLUSTERPMCEID1_EL1 sys_reg(3, 0, 15, 6, 5)
35
36static inline u32 __dsu_pmu_read_pmcr(void)
37{
38 return read_sysreg_s(CLUSTERPMCR_EL1);
39}
40
41static inline void __dsu_pmu_write_pmcr(u32 val)
42{
43 write_sysreg_s(val, CLUSTERPMCR_EL1);
44 isb();
45}
46
47static inline u32 __dsu_pmu_get_reset_overflow(void)
48{
49 u32 val = read_sysreg_s(CLUSTERPMOVSCLR_EL1);
50 /* Clear the bit */
51 write_sysreg_s(val, CLUSTERPMOVSCLR_EL1);
52 isb();
53 return val;
54}
55
56static inline void __dsu_pmu_select_counter(int counter)
57{
58 write_sysreg_s(counter, CLUSTERPMSELR_EL1);
59 isb();
60}
61
62static inline u64 __dsu_pmu_read_counter(int counter)
63{
64 __dsu_pmu_select_counter(counter);
65 return read_sysreg_s(CLUSTERPMXEVCNTR_EL1);
66}
67
68static inline void __dsu_pmu_write_counter(int counter, u64 val)
69{
70 __dsu_pmu_select_counter(counter);
71 write_sysreg_s(val, CLUSTERPMXEVCNTR_EL1);
72 isb();
73}
74
75static inline void __dsu_pmu_set_event(int counter, u32 event)
76{
77 __dsu_pmu_select_counter(counter);
78 write_sysreg_s(event, CLUSTERPMXEVTYPER_EL1);
79 isb();
80}
81
82static inline u64 __dsu_pmu_read_pmccntr(void)
83{
84 return read_sysreg_s(CLUSTERPMCCNTR_EL1);
85}
86
87static inline void __dsu_pmu_write_pmccntr(u64 val)
88{
89 write_sysreg_s(val, CLUSTERPMCCNTR_EL1);
90 isb();
91}
92
93static inline void __dsu_pmu_disable_counter(int counter)
94{
95 write_sysreg_s(BIT(counter), CLUSTERPMCNTENCLR_EL1);
96 isb();
97}
98
99static inline void __dsu_pmu_enable_counter(int counter)
100{
101 write_sysreg_s(BIT(counter), CLUSTERPMCNTENSET_EL1);
102 isb();
103}
104
105static inline void __dsu_pmu_counter_interrupt_enable(int counter)
106{
107 write_sysreg_s(BIT(counter), CLUSTERPMINTENSET_EL1);
108 isb();
109}
110
111static inline void __dsu_pmu_counter_interrupt_disable(int counter)
112{
113 write_sysreg_s(BIT(counter), CLUSTERPMINTENCLR_EL1);
114 isb();
115}
116
117
118static inline u32 __dsu_pmu_read_pmceid(int n)
119{
120 switch (n) {
121 case 0:
122 return read_sysreg_s(CLUSTERPMCEID0_EL1);
123 case 1:
124 return read_sysreg_s(CLUSTERPMCEID1_EL1);
125 default:
126 BUILD_BUG();
127 return 0;
128 }
129}
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index b3da6c886835..4128bec033f6 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -4,6 +4,7 @@
4 4
5#include <asm/alternative.h> 5#include <asm/alternative.h>
6#include <asm/kernel-pgtable.h> 6#include <asm/kernel-pgtable.h>
7#include <asm/mmu.h>
7#include <asm/sysreg.h> 8#include <asm/sysreg.h>
8#include <asm/assembler.h> 9#include <asm/assembler.h>
9 10
@@ -12,52 +13,63 @@
12 */ 13 */
13#ifdef CONFIG_ARM64_SW_TTBR0_PAN 14#ifdef CONFIG_ARM64_SW_TTBR0_PAN
14 .macro __uaccess_ttbr0_disable, tmp1 15 .macro __uaccess_ttbr0_disable, tmp1
15 mrs \tmp1, ttbr1_el1 // swapper_pg_dir 16 mrs \tmp1, ttbr1_el1 // swapper_pg_dir
16 add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir 17 bic \tmp1, \tmp1, #TTBR_ASID_MASK
17 msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 18 sub \tmp1, \tmp1, #RESERVED_TTBR0_SIZE // reserved_ttbr0 just before swapper_pg_dir
19 msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
20 isb
21 add \tmp1, \tmp1, #RESERVED_TTBR0_SIZE
22 msr ttbr1_el1, \tmp1 // set reserved ASID
18 isb 23 isb
19 .endm 24 .endm
20 25
21 .macro __uaccess_ttbr0_enable, tmp1 26 .macro __uaccess_ttbr0_enable, tmp1, tmp2
22 get_thread_info \tmp1 27 get_thread_info \tmp1
23 ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 28 ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
29 mrs \tmp2, ttbr1_el1
30 extr \tmp2, \tmp2, \tmp1, #48
31 ror \tmp2, \tmp2, #16
32 msr ttbr1_el1, \tmp2 // set the active ASID
33 isb
24 msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 34 msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
25 isb 35 isb
26 .endm 36 .endm
27 37
28 .macro uaccess_ttbr0_disable, tmp1 38 .macro uaccess_ttbr0_disable, tmp1, tmp2
29alternative_if_not ARM64_HAS_PAN 39alternative_if_not ARM64_HAS_PAN
40 save_and_disable_irq \tmp2 // avoid preemption
30 __uaccess_ttbr0_disable \tmp1 41 __uaccess_ttbr0_disable \tmp1
42 restore_irq \tmp2
31alternative_else_nop_endif 43alternative_else_nop_endif
32 .endm 44 .endm
33 45
34 .macro uaccess_ttbr0_enable, tmp1, tmp2 46 .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
35alternative_if_not ARM64_HAS_PAN 47alternative_if_not ARM64_HAS_PAN
36 save_and_disable_irq \tmp2 // avoid preemption 48 save_and_disable_irq \tmp3 // avoid preemption
37 __uaccess_ttbr0_enable \tmp1 49 __uaccess_ttbr0_enable \tmp1, \tmp2
38 restore_irq \tmp2 50 restore_irq \tmp3
39alternative_else_nop_endif 51alternative_else_nop_endif
40 .endm 52 .endm
41#else 53#else
42 .macro uaccess_ttbr0_disable, tmp1 54 .macro uaccess_ttbr0_disable, tmp1, tmp2
43 .endm 55 .endm
44 56
45 .macro uaccess_ttbr0_enable, tmp1, tmp2 57 .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
46 .endm 58 .endm
47#endif 59#endif
48 60
49/* 61/*
50 * These macros are no-ops when UAO is present. 62 * These macros are no-ops when UAO is present.
51 */ 63 */
52 .macro uaccess_disable_not_uao, tmp1 64 .macro uaccess_disable_not_uao, tmp1, tmp2
53 uaccess_ttbr0_disable \tmp1 65 uaccess_ttbr0_disable \tmp1, \tmp2
54alternative_if ARM64_ALT_PAN_NOT_UAO 66alternative_if ARM64_ALT_PAN_NOT_UAO
55 SET_PSTATE_PAN(1) 67 SET_PSTATE_PAN(1)
56alternative_else_nop_endif 68alternative_else_nop_endif
57 .endm 69 .endm
58 70
59 .macro uaccess_enable_not_uao, tmp1, tmp2 71 .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3
60 uaccess_ttbr0_enable \tmp1, \tmp2 72 uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
61alternative_if ARM64_ALT_PAN_NOT_UAO 73alternative_if ARM64_ALT_PAN_NOT_UAO
62 SET_PSTATE_PAN(0) 74 SET_PSTATE_PAN(0)
63alternative_else_nop_endif 75alternative_else_nop_endif
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8b168280976f..3873dd7b5a32 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -26,7 +26,6 @@
26#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
27#include <asm/cpufeature.h> 27#include <asm/cpufeature.h>
28#include <asm/debug-monitors.h> 28#include <asm/debug-monitors.h>
29#include <asm/mmu_context.h>
30#include <asm/page.h> 29#include <asm/page.h>
31#include <asm/pgtable-hwdef.h> 30#include <asm/pgtable-hwdef.h>
32#include <asm/ptrace.h> 31#include <asm/ptrace.h>
@@ -110,6 +109,13 @@
110 .endm 109 .endm
111 110
112/* 111/*
112 * RAS Error Synchronization barrier
113 */
114 .macro esb
115 hint #16
116 .endm
117
118/*
113 * NOP sequence 119 * NOP sequence
114 */ 120 */
115 .macro nops, num 121 .macro nops, num
@@ -255,7 +261,11 @@ lr .req x30 // link register
255#else 261#else
256 adr_l \dst, \sym 262 adr_l \dst, \sym
257#endif 263#endif
264alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
258 mrs \tmp, tpidr_el1 265 mrs \tmp, tpidr_el1
266alternative_else
267 mrs \tmp, tpidr_el2
268alternative_endif
259 add \dst, \dst, \tmp 269 add \dst, \dst, \tmp
260 .endm 270 .endm
261 271
@@ -266,7 +276,11 @@ lr .req x30 // link register
266 */ 276 */
267 .macro ldr_this_cpu dst, sym, tmp 277 .macro ldr_this_cpu dst, sym, tmp
268 adr_l \dst, \sym 278 adr_l \dst, \sym
279alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
269 mrs \tmp, tpidr_el1 280 mrs \tmp, tpidr_el1
281alternative_else
282 mrs \tmp, tpidr_el2
283alternative_endif
270 ldr \dst, [\dst, \tmp] 284 ldr \dst, [\dst, \tmp]
271 .endm 285 .endm
272 286
@@ -344,10 +358,26 @@ alternative_endif
344 * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map 358 * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
345 */ 359 */
346 .macro tcr_set_idmap_t0sz, valreg, tmpreg 360 .macro tcr_set_idmap_t0sz, valreg, tmpreg
347#ifndef CONFIG_ARM64_VA_BITS_48
348 ldr_l \tmpreg, idmap_t0sz 361 ldr_l \tmpreg, idmap_t0sz
349 bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH 362 bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
350#endif 363 .endm
364
365/*
366 * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
367 * ID_AA64MMFR0_EL1.PARange value
368 *
369 * tcr: register with the TCR_ELx value to be updated
370 * pos: IPS or PS bitfield position
371 * tmp{0,1}: temporary registers
372 */
373 .macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1
374 mrs \tmp0, ID_AA64MMFR0_EL1
375 // Narrow PARange to fit the PS field in TCR_ELx
376 ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
377 mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX
378 cmp \tmp0, \tmp1
379 csel \tmp0, \tmp1, \tmp0, hi
380 bfi \tcr, \tmp0, \pos, #3
351 .endm 381 .endm
352 382
353/* 383/*
@@ -478,37 +508,18 @@ alternative_endif
478 .endm 508 .endm
479 509
480/* 510/*
481 * Errata workaround prior to TTBR0_EL1 update 511 * Arrange a physical address in a TTBR register, taking care of 52-bit
512 * addresses.
482 * 513 *
483 * val: TTBR value with new BADDR, preserved 514 * phys: physical address, preserved
484 * tmp0: temporary register, clobbered 515 * ttbr: returns the TTBR value
485 * tmp1: other temporary register, clobbered
486 */ 516 */
487 .macro pre_ttbr0_update_workaround, val, tmp0, tmp1 517 .macro phys_to_ttbr, phys, ttbr
488#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 518#ifdef CONFIG_ARM64_PA_BITS_52
489alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003 519 orr \ttbr, \phys, \phys, lsr #46
490 mrs \tmp0, ttbr0_el1 520 and \ttbr, \ttbr, #TTBR_BADDR_MASK_52
491 mov \tmp1, #FALKOR_RESERVED_ASID 521#else
492 bfi \tmp0, \tmp1, #48, #16 // reserved ASID + old BADDR 522 mov \ttbr, \phys
493 msr ttbr0_el1, \tmp0
494 isb
495 bfi \tmp0, \val, #0, #48 // reserved ASID + new BADDR
496 msr ttbr0_el1, \tmp0
497 isb
498alternative_else_nop_endif
499#endif
500 .endm
501
502/*
503 * Errata workaround post TTBR0_EL1 update.
504 */
505 .macro post_ttbr0_update_workaround
506#ifdef CONFIG_CAVIUM_ERRATUM_27456
507alternative_if ARM64_WORKAROUND_CAVIUM_27456
508 ic iallu
509 dsb nsh
510 isb
511alternative_else_nop_endif
512#endif 523#endif
513 .endm 524 .endm
514 525
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 2ff7c5e8efab..bb263820de13 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -41,7 +41,11 @@
41#define ARM64_WORKAROUND_CAVIUM_30115 20 41#define ARM64_WORKAROUND_CAVIUM_30115 20
42#define ARM64_HAS_DCPOP 21 42#define ARM64_HAS_DCPOP 21
43#define ARM64_SVE 22 43#define ARM64_SVE 22
44#define ARM64_UNMAP_KERNEL_AT_EL0 23
45#define ARM64_HARDEN_BRANCH_PREDICTOR 24
46#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
47#define ARM64_HAS_RAS_EXTN 26
44 48
45#define ARM64_NCAPS 23 49#define ARM64_NCAPS 27
46 50
47#endif /* __ASM_CPUCAPS_H */ 51#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index cbf08d7cbf30..be7bd19c87ec 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -79,28 +79,37 @@
79#define ARM_CPU_PART_AEM_V8 0xD0F 79#define ARM_CPU_PART_AEM_V8 0xD0F
80#define ARM_CPU_PART_FOUNDATION 0xD00 80#define ARM_CPU_PART_FOUNDATION 0xD00
81#define ARM_CPU_PART_CORTEX_A57 0xD07 81#define ARM_CPU_PART_CORTEX_A57 0xD07
82#define ARM_CPU_PART_CORTEX_A72 0xD08
82#define ARM_CPU_PART_CORTEX_A53 0xD03 83#define ARM_CPU_PART_CORTEX_A53 0xD03
83#define ARM_CPU_PART_CORTEX_A73 0xD09 84#define ARM_CPU_PART_CORTEX_A73 0xD09
85#define ARM_CPU_PART_CORTEX_A75 0xD0A
84 86
85#define APM_CPU_PART_POTENZA 0x000 87#define APM_CPU_PART_POTENZA 0x000
86 88
87#define CAVIUM_CPU_PART_THUNDERX 0x0A1 89#define CAVIUM_CPU_PART_THUNDERX 0x0A1
88#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 90#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
89#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 91#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
92#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
90 93
91#define BRCM_CPU_PART_VULCAN 0x516 94#define BRCM_CPU_PART_VULCAN 0x516
92 95
93#define QCOM_CPU_PART_FALKOR_V1 0x800 96#define QCOM_CPU_PART_FALKOR_V1 0x800
94#define QCOM_CPU_PART_FALKOR 0xC00 97#define QCOM_CPU_PART_FALKOR 0xC00
98#define QCOM_CPU_PART_KRYO 0x200
95 99
96#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) 100#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
97#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) 101#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
102#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
98#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) 103#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
104#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
99#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) 105#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
100#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) 106#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
101#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) 107#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
108#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
109#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
102#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) 110#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
103#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) 111#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
112#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
104 113
105#ifndef __ASSEMBLY__ 114#ifndef __ASSEMBLY__
106 115
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index c4cd5081d78b..8389050328bb 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -121,19 +121,21 @@ static inline void efi_set_pgd(struct mm_struct *mm)
121 if (mm != current->active_mm) { 121 if (mm != current->active_mm) {
122 /* 122 /*
123 * Update the current thread's saved ttbr0 since it is 123 * Update the current thread's saved ttbr0 since it is
124 * restored as part of a return from exception. Set 124 * restored as part of a return from exception. Enable
125 * the hardware TTBR0_EL1 using cpu_switch_mm() 125 * access to the valid TTBR0_EL1 and invoke the errata
126 * directly to enable potential errata workarounds. 126 * workaround directly since there is no return from
127 * exception when invoking the EFI run-time services.
127 */ 128 */
128 update_saved_ttbr0(current, mm); 129 update_saved_ttbr0(current, mm);
129 cpu_switch_mm(mm->pgd, mm); 130 uaccess_ttbr0_enable();
131 post_ttbr_update_workaround();
130 } else { 132 } else {
131 /* 133 /*
132 * Defer the switch to the current thread's TTBR0_EL1 134 * Defer the switch to the current thread's TTBR0_EL1
133 * until uaccess_enable(). Restore the current 135 * until uaccess_enable(). Restore the current
134 * thread's saved ttbr0 corresponding to its active_mm 136 * thread's saved ttbr0 corresponding to its active_mm
135 */ 137 */
136 cpu_set_reserved_ttbr0(); 138 uaccess_ttbr0_disable();
137 update_saved_ttbr0(current, current->active_mm); 139 update_saved_ttbr0(current, current->active_mm);
138 } 140 }
139 } 141 }
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 014d7d8edcf9..803443d74926 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -86,6 +86,18 @@
86#define ESR_ELx_WNR_SHIFT (6) 86#define ESR_ELx_WNR_SHIFT (6)
87#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT) 87#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT)
88 88
89/* Asynchronous Error Type */
90#define ESR_ELx_IDS_SHIFT (24)
91#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT)
92#define ESR_ELx_AET_SHIFT (10)
93#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT)
94
95#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT)
96#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT)
97#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT)
98#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT)
99#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT)
100
89/* Shared ISS field definitions for Data/Instruction aborts */ 101/* Shared ISS field definitions for Data/Instruction aborts */
90#define ESR_ELx_SET_SHIFT (11) 102#define ESR_ELx_SET_SHIFT (11)
91#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT) 103#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
@@ -100,6 +112,7 @@
100#define ESR_ELx_FSC (0x3F) 112#define ESR_ELx_FSC (0x3F)
101#define ESR_ELx_FSC_TYPE (0x3C) 113#define ESR_ELx_FSC_TYPE (0x3C)
102#define ESR_ELx_FSC_EXTABT (0x10) 114#define ESR_ELx_FSC_EXTABT (0x10)
115#define ESR_ELx_FSC_SERROR (0x11)
103#define ESR_ELx_FSC_ACCESS (0x08) 116#define ESR_ELx_FSC_ACCESS (0x08)
104#define ESR_ELx_FSC_FAULT (0x04) 117#define ESR_ELx_FSC_FAULT (0x04)
105#define ESR_ELx_FSC_PERM (0x0C) 118#define ESR_ELx_FSC_PERM (0x0C)
@@ -127,6 +140,13 @@
127#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) 140#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
128#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) 141#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
129 142
143#define DISR_EL1_IDS (UL(1) << 24)
144/*
145 * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean
146 * different things in the future...
147 */
148#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
149
130/* ESR value templates for specific events */ 150/* ESR value templates for specific events */
131 151
132/* BRK instruction trap from AArch64 state */ 152/* BRK instruction trap from AArch64 state */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 0c2eec490abf..bc30429d8e91 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -18,6 +18,8 @@
18#ifndef __ASM_EXCEPTION_H 18#ifndef __ASM_EXCEPTION_H
19#define __ASM_EXCEPTION_H 19#define __ASM_EXCEPTION_H
20 20
21#include <asm/esr.h>
22
21#include <linux/interrupt.h> 23#include <linux/interrupt.h>
22 24
23#define __exception __attribute__((section(".exception.text"))) 25#define __exception __attribute__((section(".exception.text")))
@@ -27,4 +29,16 @@
27#define __exception_irq_entry __exception 29#define __exception_irq_entry __exception
28#endif 30#endif
29 31
32static inline u32 disr_to_esr(u64 disr)
33{
34 unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
35
36 if ((disr & DISR_EL1_IDS) == 0)
37 esr |= (disr & DISR_EL1_ESR_MASK);
38 else
39 esr |= (disr & ESR_ELx_ISS_MASK);
40
41 return esr;
42}
43
30#endif /* __ASM_EXCEPTION_H */ 44#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 4052ec39e8db..ec1e6d6fa14c 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -58,6 +58,11 @@ enum fixed_addresses {
58 FIX_APEI_GHES_NMI, 58 FIX_APEI_GHES_NMI,
59#endif /* CONFIG_ACPI_APEI_GHES */ 59#endif /* CONFIG_ACPI_APEI_GHES */
60 60
61#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
62 FIX_ENTRY_TRAMP_DATA,
63 FIX_ENTRY_TRAMP_TEXT,
64#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
65#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
61 __end_of_permanent_fixed_addresses, 66 __end_of_permanent_fixed_addresses,
62 67
63 /* 68 /*
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 74f34392a531..8857a0f0d0f7 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -71,7 +71,7 @@ extern void fpsimd_flush_thread(void);
71extern void fpsimd_signal_preserve_current_state(void); 71extern void fpsimd_signal_preserve_current_state(void);
72extern void fpsimd_preserve_current_state(void); 72extern void fpsimd_preserve_current_state(void);
73extern void fpsimd_restore_current_state(void); 73extern void fpsimd_restore_current_state(void);
74extern void fpsimd_update_current_state(struct fpsimd_state *state); 74extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
75 75
76extern void fpsimd_flush_task_state(struct task_struct *target); 76extern void fpsimd_flush_task_state(struct task_struct *target);
77extern void sve_flush_cpu_state(void); 77extern void sve_flush_cpu_state(void);
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 7803343e5881..82386e860dd2 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -52,7 +52,52 @@
52#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT)) 52#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
53#endif 53#endif
54 54
55#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) 55
56/*
57 * If KASLR is enabled, then an offset K is added to the kernel address
58 * space. The bottom 21 bits of this offset are zero to guarantee 2MB
59 * alignment for PA and VA.
60 *
61 * For each pagetable level of the swapper, we know that the shift will
62 * be larger than 21 (for the 4KB granule case we use section maps thus
63 * the smallest shift is actually 30) thus there is the possibility that
64 * KASLR can increase the number of pagetable entries by 1, so we make
65 * room for this extra entry.
66 *
67 * Note KASLR cannot increase the number of required entries for a level
68 * by more than one because it increments both the virtual start and end
69 * addresses equally (the extra entry comes from the case where the end
70 * address is just pushed over a boundary and the start address isn't).
71 */
72
73#ifdef CONFIG_RANDOMIZE_BASE
74#define EARLY_KASLR (1)
75#else
76#define EARLY_KASLR (0)
77#endif
78
79#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \
80 - ((vstart) >> (shift)) + 1 + EARLY_KASLR)
81
82#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT))
83
84#if SWAPPER_PGTABLE_LEVELS > 3
85#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT))
86#else
87#define EARLY_PUDS(vstart, vend) (0)
88#endif
89
90#if SWAPPER_PGTABLE_LEVELS > 2
91#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT))
92#else
93#define EARLY_PMDS(vstart, vend) (0)
94#endif
95
96#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \
97 + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \
98 + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
99 + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
100#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end))
56#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) 101#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
57 102
58#ifdef CONFIG_ARM64_SW_TTBR0_PAN 103#ifdef CONFIG_ARM64_SW_TTBR0_PAN
@@ -78,8 +123,16 @@
78/* 123/*
79 * Initial memory map attributes. 124 * Initial memory map attributes.
80 */ 125 */
81#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) 126#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
82#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) 127#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
128
129#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
130#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG)
131#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG)
132#else
133#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS
134#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS
135#endif
83 136
84#if ARM64_SWAPPER_USES_SECTION_MAPS 137#if ARM64_SWAPPER_USES_SECTION_MAPS
85#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) 138#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 715d395ef45b..b0c84171e6a3 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,8 @@
23#include <asm/types.h> 23#include <asm/types.h>
24 24
25/* Hyp Configuration Register (HCR) bits */ 25/* Hyp Configuration Register (HCR) bits */
26#define HCR_TEA (UL(1) << 37)
27#define HCR_TERR (UL(1) << 36)
26#define HCR_E2H (UL(1) << 34) 28#define HCR_E2H (UL(1) << 34)
27#define HCR_ID (UL(1) << 33) 29#define HCR_ID (UL(1) << 33)
28#define HCR_CD (UL(1) << 32) 30#define HCR_CD (UL(1) << 32)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ab4d0a926043..24961b732e65 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -68,6 +68,8 @@ extern u32 __kvm_get_mdcr_el2(void);
68 68
69extern u32 __init_stage2_translation(void); 69extern u32 __init_stage2_translation(void);
70 70
71extern void __qcom_hyp_sanitize_btac_predictors(void);
72
71#endif 73#endif
72 74
73#endif /* __ARM_KVM_ASM_H__ */ 75#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5f28dfa14cee..413dc82b1e89 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -50,6 +50,13 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
50 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; 50 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
51 if (is_kernel_in_hyp_mode()) 51 if (is_kernel_in_hyp_mode())
52 vcpu->arch.hcr_el2 |= HCR_E2H; 52 vcpu->arch.hcr_el2 |= HCR_E2H;
53 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
54 /* route synchronous external abort exceptions to EL2 */
55 vcpu->arch.hcr_el2 |= HCR_TEA;
56 /* trap error record accesses */
57 vcpu->arch.hcr_el2 |= HCR_TERR;
58 }
59
53 if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) 60 if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
54 vcpu->arch.hcr_el2 &= ~HCR_RW; 61 vcpu->arch.hcr_el2 &= ~HCR_RW;
55} 62}
@@ -64,6 +71,11 @@ static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
64 vcpu->arch.hcr_el2 = hcr; 71 vcpu->arch.hcr_el2 = hcr;
65} 72}
66 73
74static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
75{
76 vcpu->arch.vsesr_el2 = vsesr;
77}
78
67static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) 79static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
68{ 80{
69 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; 81 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
@@ -171,6 +183,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
171 return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; 183 return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
172} 184}
173 185
186static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
187{
188 return vcpu->arch.fault.disr_el1;
189}
190
174static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) 191static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
175{ 192{
176 return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; 193 return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ea6cb5b24258..4485ae8e98de 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/kvm_types.h> 26#include <linux/kvm_types.h>
27#include <asm/cpufeature.h> 27#include <asm/cpufeature.h>
28#include <asm/daifflags.h>
28#include <asm/fpsimd.h> 29#include <asm/fpsimd.h>
29#include <asm/kvm.h> 30#include <asm/kvm.h>
30#include <asm/kvm_asm.h> 31#include <asm/kvm_asm.h>
@@ -89,6 +90,7 @@ struct kvm_vcpu_fault_info {
89 u32 esr_el2; /* Hyp Syndrom Register */ 90 u32 esr_el2; /* Hyp Syndrom Register */
90 u64 far_el2; /* Hyp Fault Address Register */ 91 u64 far_el2; /* Hyp Fault Address Register */
91 u64 hpfar_el2; /* Hyp IPA Fault Address Register */ 92 u64 hpfar_el2; /* Hyp IPA Fault Address Register */
93 u64 disr_el1; /* Deferred [SError] Status Register */
92}; 94};
93 95
94/* 96/*
@@ -120,6 +122,7 @@ enum vcpu_sysreg {
120 PAR_EL1, /* Physical Address Register */ 122 PAR_EL1, /* Physical Address Register */
121 MDSCR_EL1, /* Monitor Debug System Control Register */ 123 MDSCR_EL1, /* Monitor Debug System Control Register */
122 MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ 124 MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
125 DISR_EL1, /* Deferred Interrupt Status Register */
123 126
124 /* Performance Monitors Registers */ 127 /* Performance Monitors Registers */
125 PMCR_EL0, /* Control Register */ 128 PMCR_EL0, /* Control Register */
@@ -192,6 +195,8 @@ struct kvm_cpu_context {
192 u64 sys_regs[NR_SYS_REGS]; 195 u64 sys_regs[NR_SYS_REGS];
193 u32 copro[NR_COPRO_REGS]; 196 u32 copro[NR_COPRO_REGS];
194 }; 197 };
198
199 struct kvm_vcpu *__hyp_running_vcpu;
195}; 200};
196 201
197typedef struct kvm_cpu_context kvm_cpu_context_t; 202typedef struct kvm_cpu_context kvm_cpu_context_t;
@@ -277,6 +282,9 @@ struct kvm_vcpu_arch {
277 282
278 /* Detect first run of a vcpu */ 283 /* Detect first run of a vcpu */
279 bool has_run_once; 284 bool has_run_once;
285
286 /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
287 u64 vsesr_el2;
280}; 288};
281 289
282#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) 290#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
@@ -340,6 +348,8 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
340 348
341int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, 349int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
342 int exception_index); 350 int exception_index);
351void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
352 int exception_index);
343 353
344int kvm_perf_init(void); 354int kvm_perf_init(void);
345int kvm_perf_teardown(void); 355int kvm_perf_teardown(void);
@@ -396,4 +406,13 @@ static inline void kvm_fpsimd_flush_cpu_state(void)
396 sve_flush_cpu_state(); 406 sve_flush_cpu_state();
397} 407}
398 408
409static inline void kvm_arm_vhe_guest_enter(void)
410{
411 local_daif_mask();
412}
413
414static inline void kvm_arm_vhe_guest_exit(void)
415{
416 local_daif_restore(DAIF_PROCCTX_NOIRQ);
417}
399#endif /* __ARM64_KVM_HOST_H__ */ 418#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 672c8684d5c2..72e279dbae5f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -273,15 +273,26 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
273 273
274static inline bool __kvm_cpu_uses_extended_idmap(void) 274static inline bool __kvm_cpu_uses_extended_idmap(void)
275{ 275{
276 return __cpu_uses_extended_idmap(); 276 return __cpu_uses_extended_idmap_level();
277} 277}
278 278
279static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
280{
281 return idmap_ptrs_per_pgd;
282}
283
284/*
285 * Can't use pgd_populate here, because the extended idmap adds an extra level
286 * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended
287 * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4.
288 */
279static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, 289static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
280 pgd_t *hyp_pgd, 290 pgd_t *hyp_pgd,
281 pgd_t *merged_hyp_pgd, 291 pgd_t *merged_hyp_pgd,
282 unsigned long hyp_idmap_start) 292 unsigned long hyp_idmap_start)
283{ 293{
284 int idmap_idx; 294 int idmap_idx;
295 u64 pgd_addr;
285 296
286 /* 297 /*
287 * Use the first entry to access the HYP mappings. It is 298 * Use the first entry to access the HYP mappings. It is
@@ -289,7 +300,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
289 * extended idmap. 300 * extended idmap.
290 */ 301 */
291 VM_BUG_ON(pgd_val(merged_hyp_pgd[0])); 302 VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
292 merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE); 303 pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd));
304 merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE);
293 305
294 /* 306 /*
295 * Create another extended level entry that points to the boot HYP map, 307 * Create another extended level entry that points to the boot HYP map,
@@ -299,7 +311,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
299 */ 311 */
300 idmap_idx = hyp_idmap_start >> VA_BITS; 312 idmap_idx = hyp_idmap_start >> VA_BITS;
301 VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx])); 313 VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
302 merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE); 314 pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd));
315 merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE);
303} 316}
304 317
305static inline unsigned int kvm_get_vmid_bits(void) 318static inline unsigned int kvm_get_vmid_bits(void)
@@ -309,5 +322,45 @@ static inline unsigned int kvm_get_vmid_bits(void)
309 return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; 322 return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
310} 323}
311 324
325#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
326#include <asm/mmu.h>
327
328static inline void *kvm_get_hyp_vector(void)
329{
330 struct bp_hardening_data *data = arm64_get_bp_hardening_data();
331 void *vect = kvm_ksym_ref(__kvm_hyp_vector);
332
333 if (data->fn) {
334 vect = __bp_harden_hyp_vecs_start +
335 data->hyp_vectors_slot * SZ_2K;
336
337 if (!has_vhe())
338 vect = lm_alias(vect);
339 }
340
341 return vect;
342}
343
344static inline int kvm_map_vectors(void)
345{
346 return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start),
347 kvm_ksym_ref(__bp_harden_hyp_vecs_end),
348 PAGE_HYP_EXEC);
349}
350
351#else
352static inline void *kvm_get_hyp_vector(void)
353{
354 return kvm_ksym_ref(__kvm_hyp_vector);
355}
356
357static inline int kvm_map_vectors(void)
358{
359 return 0;
360}
361#endif
362
363#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
364
312#endif /* __ASSEMBLY__ */ 365#endif /* __ASSEMBLY__ */
313#endif /* __ARM64_KVM_MMU_H__ */ 366#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 0d34bf0a89c7..a050d4f3615d 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -17,6 +17,11 @@
17#define __ASM_MMU_H 17#define __ASM_MMU_H
18 18
19#define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ 19#define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */
20#define USER_ASID_BIT 48
21#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
22#define TTBR_ASID_MASK (UL(0xffff) << 48)
23
24#ifndef __ASSEMBLY__
20 25
21typedef struct { 26typedef struct {
22 atomic64_t id; 27 atomic64_t id;
@@ -31,6 +36,49 @@ typedef struct {
31 */ 36 */
32#define ASID(mm) ((mm)->context.id.counter & 0xffff) 37#define ASID(mm) ((mm)->context.id.counter & 0xffff)
33 38
39static inline bool arm64_kernel_unmapped_at_el0(void)
40{
41 return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
42 cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
43}
44
45typedef void (*bp_hardening_cb_t)(void);
46
47struct bp_hardening_data {
48 int hyp_vectors_slot;
49 bp_hardening_cb_t fn;
50};
51
52#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
53extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
54
55DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
56
57static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
58{
59 return this_cpu_ptr(&bp_hardening_data);
60}
61
62static inline void arm64_apply_bp_hardening(void)
63{
64 struct bp_hardening_data *d;
65
66 if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR))
67 return;
68
69 d = arm64_get_bp_hardening_data();
70 if (d->fn)
71 d->fn();
72}
73#else
74static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
75{
76 return NULL;
77}
78
79static inline void arm64_apply_bp_hardening(void) { }
80#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
81
34extern void paging_init(void); 82extern void paging_init(void);
35extern void bootmem_init(void); 83extern void bootmem_init(void);
36extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); 84extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
@@ -41,4 +89,5 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
41extern void *fixmap_remap_fdt(phys_addr_t dt_phys); 89extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
42extern void mark_linear_text_alias_ro(void); 90extern void mark_linear_text_alias_ro(void);
43 91
92#endif /* !__ASSEMBLY__ */
44#endif 93#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 9d155fa9a507..8d3331985d2e 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -19,8 +19,6 @@
19#ifndef __ASM_MMU_CONTEXT_H 19#ifndef __ASM_MMU_CONTEXT_H
20#define __ASM_MMU_CONTEXT_H 20#define __ASM_MMU_CONTEXT_H
21 21
22#define FALKOR_RESERVED_ASID 1
23
24#ifndef __ASSEMBLY__ 22#ifndef __ASSEMBLY__
25 23
26#include <linux/compiler.h> 24#include <linux/compiler.h>
@@ -51,23 +49,39 @@ static inline void contextidr_thread_switch(struct task_struct *next)
51 */ 49 */
52static inline void cpu_set_reserved_ttbr0(void) 50static inline void cpu_set_reserved_ttbr0(void)
53{ 51{
54 unsigned long ttbr = __pa_symbol(empty_zero_page); 52 unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page));
55 53
56 write_sysreg(ttbr, ttbr0_el1); 54 write_sysreg(ttbr, ttbr0_el1);
57 isb(); 55 isb();
58} 56}
59 57
58static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
59{
60 BUG_ON(pgd == swapper_pg_dir);
61 cpu_set_reserved_ttbr0();
62 cpu_do_switch_mm(virt_to_phys(pgd),mm);
63}
64
60/* 65/*
61 * TCR.T0SZ value to use when the ID map is active. Usually equals 66 * TCR.T0SZ value to use when the ID map is active. Usually equals
62 * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in 67 * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
63 * physical memory, in which case it will be smaller. 68 * physical memory, in which case it will be smaller.
64 */ 69 */
65extern u64 idmap_t0sz; 70extern u64 idmap_t0sz;
71extern u64 idmap_ptrs_per_pgd;
66 72
67static inline bool __cpu_uses_extended_idmap(void) 73static inline bool __cpu_uses_extended_idmap(void)
68{ 74{
69 return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) && 75 return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
70 unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); 76}
77
78/*
79 * True if the extended ID map requires an extra level of translation table
80 * to be configured.
81 */
82static inline bool __cpu_uses_extended_idmap_level(void)
83{
84 return ARM64_HW_PGTABLE_LEVELS(64 - idmap_t0sz) > CONFIG_PGTABLE_LEVELS;
71} 85}
72 86
73/* 87/*
@@ -170,7 +184,7 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
170 else 184 else
171 ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; 185 ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
172 186
173 task_thread_info(tsk)->ttbr0 = ttbr; 187 WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
174} 188}
175#else 189#else
176static inline void update_saved_ttbr0(struct task_struct *tsk, 190static inline void update_saved_ttbr0(struct task_struct *tsk,
@@ -225,6 +239,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
225#define activate_mm(prev,next) switch_mm(prev, next, current) 239#define activate_mm(prev,next) switch_mm(prev, next, current)
226 240
227void verify_cpu_asid_bits(void); 241void verify_cpu_asid_bits(void);
242void post_ttbr_update_workaround(void);
228 243
229#endif /* !__ASSEMBLY__ */ 244#endif /* !__ASSEMBLY__ */
230 245
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 3bd498e4de4c..43393208229e 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,11 +16,15 @@
16#ifndef __ASM_PERCPU_H 16#ifndef __ASM_PERCPU_H
17#define __ASM_PERCPU_H 17#define __ASM_PERCPU_H
18 18
19#include <asm/alternative.h>
19#include <asm/stack_pointer.h> 20#include <asm/stack_pointer.h>
20 21
21static inline void set_my_cpu_offset(unsigned long off) 22static inline void set_my_cpu_offset(unsigned long off)
22{ 23{
23 asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); 24 asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
25 "msr tpidr_el2, %0",
26 ARM64_HAS_VIRT_HOST_EXTN)
27 :: "r" (off) : "memory");
24} 28}
25 29
26static inline unsigned long __my_cpu_offset(void) 30static inline unsigned long __my_cpu_offset(void)
@@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void)
31 * We want to allow caching the value, so avoid using volatile and 35 * We want to allow caching the value, so avoid using volatile and
32 * instead use a fake stack read to hazard against barrier(). 36 * instead use a fake stack read to hazard against barrier().
33 */ 37 */
34 asm("mrs %0, tpidr_el1" : "=r" (off) : 38 asm(ALTERNATIVE("mrs %0, tpidr_el1",
39 "mrs %0, tpidr_el2",
40 ARM64_HAS_VIRT_HOST_EXTN)
41 : "=r" (off) :
35 "Q" (*(const unsigned long *)current_stack_pointer)); 42 "Q" (*(const unsigned long *)current_stack_pointer));
36 43
37 return off; 44 return off;
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 5ca6a573a701..e9d9f1b006ef 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -44,7 +44,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
44 44
45static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) 45static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
46{ 46{
47 set_pud(pud, __pud(pmd | prot)); 47 set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
48} 48}
49 49
50static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 50static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -73,7 +73,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
73 73
74static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) 74static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
75{ 75{
76 set_pgd(pgdp, __pgd(pud | prot)); 76 set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
77} 77}
78 78
79static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) 79static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
@@ -129,7 +129,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
129static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, 129static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
130 pmdval_t prot) 130 pmdval_t prot)
131{ 131{
132 set_pmd(pmdp, __pmd(pte | prot)); 132 set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
133} 133}
134 134
135/* 135/*
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index eb0c2bd90de9..f42836da8723 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -16,6 +16,8 @@
16#ifndef __ASM_PGTABLE_HWDEF_H 16#ifndef __ASM_PGTABLE_HWDEF_H
17#define __ASM_PGTABLE_HWDEF_H 17#define __ASM_PGTABLE_HWDEF_H
18 18
19#include <asm/memory.h>
20
19/* 21/*
20 * Number of page-table levels required to address 'va_bits' wide 22 * Number of page-table levels required to address 'va_bits' wide
21 * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) 23 * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
@@ -116,9 +118,9 @@
116 * Level 1 descriptor (PUD). 118 * Level 1 descriptor (PUD).
117 */ 119 */
118#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) 120#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
119#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) 121#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1)
120#define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0) 122#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
121#define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0) 123#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
122 124
123/* 125/*
124 * Level 2 descriptor (PMD). 126 * Level 2 descriptor (PMD).
@@ -166,6 +168,14 @@
166#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ 168#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
167#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */ 169#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */
168 170
171#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
172#ifdef CONFIG_ARM64_PA_BITS_52
173#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
174#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
175#else
176#define PTE_ADDR_MASK PTE_ADDR_LOW
177#endif
178
169/* 179/*
170 * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). 180 * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
171 */ 181 */
@@ -196,7 +206,7 @@
196/* 206/*
197 * Highest possible physical address supported. 207 * Highest possible physical address supported.
198 */ 208 */
199#define PHYS_MASK_SHIFT (48) 209#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
200#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) 210#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
201 211
202/* 212/*
@@ -272,9 +282,23 @@
272#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) 282#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT)
273#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) 283#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT)
274 284
285#define TCR_IPS_SHIFT 32
286#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
287#define TCR_A1 (UL(1) << 22)
275#define TCR_ASID16 (UL(1) << 36) 288#define TCR_ASID16 (UL(1) << 36)
276#define TCR_TBI0 (UL(1) << 37) 289#define TCR_TBI0 (UL(1) << 37)
277#define TCR_HA (UL(1) << 39) 290#define TCR_HA (UL(1) << 39)
278#define TCR_HD (UL(1) << 40) 291#define TCR_HD (UL(1) << 40)
279 292
293/*
294 * TTBR.
295 */
296#ifdef CONFIG_ARM64_PA_BITS_52
297/*
298 * This should be GENMASK_ULL(47, 2).
299 * TTBR_ELx[1] is RES0 in this configuration.
300 */
301#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2)
302#endif
303
280#endif 304#endif
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 0a5635fb0ef9..22a926825e3f 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -34,8 +34,16 @@
34 34
35#include <asm/pgtable-types.h> 35#include <asm/pgtable-types.h>
36 36
37#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) 37#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
38#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) 38#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
39
40#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
41#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG)
42#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG)
43#else
44#define PROT_DEFAULT _PROT_DEFAULT
45#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT
46#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
39 47
40#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) 48#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
41#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) 49#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@@ -48,6 +56,7 @@
48#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) 56#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
49 57
50#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) 58#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
59#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG)
51 60
52#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) 61#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
53#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) 62#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
@@ -55,15 +64,15 @@
55#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) 64#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
56#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) 65#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
57 66
58#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) 67#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
59#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) 68#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
60#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) 69#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
61#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) 70#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
62 71
63#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) 72#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
64#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) 73#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
65 74
66#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) 75#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
67#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) 76#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
68#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) 77#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
69#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) 78#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bdcc7f1c9d06..89167c43ebb5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -59,9 +59,22 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
59 59
60#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) 60#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
61 61
62#define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT) 62/*
63 * Macros to convert between a physical address and its placement in a
64 * page table entry, taking care of 52-bit addresses.
65 */
66#ifdef CONFIG_ARM64_PA_BITS_52
67#define __pte_to_phys(pte) \
68 ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36))
69#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK)
70#else
71#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
72#define __phys_to_pte_val(phys) (phys)
73#endif
63 74
64#define pfn_pte(pfn,prot) (__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) 75#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
76#define pfn_pte(pfn,prot) \
77 __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
65 78
66#define pte_none(pte) (!pte_val(pte)) 79#define pte_none(pte) (!pte_val(pte))
67#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) 80#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
@@ -292,6 +305,11 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
292 305
293#define __HAVE_ARCH_PTE_SPECIAL 306#define __HAVE_ARCH_PTE_SPECIAL
294 307
308static inline pte_t pgd_pte(pgd_t pgd)
309{
310 return __pte(pgd_val(pgd));
311}
312
295static inline pte_t pud_pte(pud_t pud) 313static inline pte_t pud_pte(pud_t pud)
296{ 314{
297 return __pte(pud_val(pud)); 315 return __pte(pud_val(pud));
@@ -357,15 +375,24 @@ static inline int pmd_protnone(pmd_t pmd)
357 375
358#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) 376#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
359 377
360#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) 378#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd))
361#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) 379#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
380#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
381#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
362#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) 382#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
363 383
364#define pud_write(pud) pte_write(pud_pte(pud)) 384#define pud_write(pud) pte_write(pud_pte(pud))
365#define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) 385
386#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
387#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
388#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
389#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
366 390
367#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) 391#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
368 392
393#define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd))
394#define __phys_to_pgd_val(phys) __phys_to_pte_val(phys)
395
369#define __pgprot_modify(prot,mask,bits) \ 396#define __pgprot_modify(prot,mask,bits) \
370 __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) 397 __pgprot((pgprot_val(prot) & ~(mask)) | (bits))
371 398
@@ -416,7 +443,7 @@ static inline void pmd_clear(pmd_t *pmdp)
416 443
417static inline phys_addr_t pmd_page_paddr(pmd_t pmd) 444static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
418{ 445{
419 return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; 446 return __pmd_to_phys(pmd);
420} 447}
421 448
422/* Find an entry in the third-level page table. */ 449/* Find an entry in the third-level page table. */
@@ -434,7 +461,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
434#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) 461#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
435#define pte_clear_fixmap() clear_fixmap(FIX_PTE) 462#define pte_clear_fixmap() clear_fixmap(FIX_PTE)
436 463
437#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) 464#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd)))
438 465
439/* use ONLY for statically allocated translation tables */ 466/* use ONLY for statically allocated translation tables */
440#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) 467#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
@@ -467,7 +494,7 @@ static inline void pud_clear(pud_t *pudp)
467 494
468static inline phys_addr_t pud_page_paddr(pud_t pud) 495static inline phys_addr_t pud_page_paddr(pud_t pud)
469{ 496{
470 return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; 497 return __pud_to_phys(pud);
471} 498}
472 499
473/* Find an entry in the second-level page table. */ 500/* Find an entry in the second-level page table. */
@@ -480,7 +507,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
480#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) 507#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr))
481#define pmd_clear_fixmap() clear_fixmap(FIX_PMD) 508#define pmd_clear_fixmap() clear_fixmap(FIX_PMD)
482 509
483#define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) 510#define pud_page(pud) pfn_to_page(__phys_to_pfn(__pud_to_phys(pud)))
484 511
485/* use ONLY for statically allocated translation tables */ 512/* use ONLY for statically allocated translation tables */
486#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) 513#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
@@ -519,7 +546,7 @@ static inline void pgd_clear(pgd_t *pgdp)
519 546
520static inline phys_addr_t pgd_page_paddr(pgd_t pgd) 547static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
521{ 548{
522 return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; 549 return __pgd_to_phys(pgd);
523} 550}
524 551
525/* Find an entry in the frst-level page table. */ 552/* Find an entry in the frst-level page table. */
@@ -532,7 +559,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
532#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) 559#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr))
533#define pud_clear_fixmap() clear_fixmap(FIX_PUD) 560#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
534 561
535#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) 562#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
536 563
537/* use ONLY for statically allocated translation tables */ 564/* use ONLY for statically allocated translation tables */
538#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) 565#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
@@ -682,7 +709,9 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
682#endif 709#endif
683 710
684extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; 711extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
712extern pgd_t swapper_pg_end[];
685extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; 713extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
714extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
686 715
687/* 716/*
688 * Encode and decode a swap entry: 717 * Encode and decode a swap entry:
@@ -736,6 +765,12 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
736#define kc_vaddr_to_offset(v) ((v) & ~VA_START) 765#define kc_vaddr_to_offset(v) ((v) & ~VA_START)
737#define kc_offset_to_vaddr(o) ((o) | VA_START) 766#define kc_offset_to_vaddr(o) ((o) | VA_START)
738 767
768#ifdef CONFIG_ARM64_PA_BITS_52
769#define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
770#else
771#define phys_to_ttbr(addr) (addr)
772#endif
773
739#endif /* !__ASSEMBLY__ */ 774#endif /* !__ASSEMBLY__ */
740 775
741#endif /* __ASM_PGTABLE_H */ 776#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 14ad6e4e87d1..16cef2e8449e 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -35,12 +35,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
35 35
36#include <asm/memory.h> 36#include <asm/memory.h>
37 37
38#define cpu_switch_mm(pgd,mm) \
39do { \
40 BUG_ON(pgd == swapper_pg_dir); \
41 cpu_do_switch_mm(virt_to_phys(pgd),mm); \
42} while (0)
43
44#endif /* __ASSEMBLY__ */ 38#endif /* __ASSEMBLY__ */
45#endif /* __KERNEL__ */ 39#endif /* __KERNEL__ */
46#endif /* __ASM_PROCFNS_H */ 40#endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 023cacb946c3..cee4ae25a5d1 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -216,6 +216,7 @@ static inline void spin_lock_prefetch(const void *ptr)
216 216
217int cpu_enable_pan(void *__unused); 217int cpu_enable_pan(void *__unused);
218int cpu_enable_cache_maint_trap(void *__unused); 218int cpu_enable_cache_maint_trap(void *__unused);
219int cpu_clear_disr(void *__unused);
219 220
220/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ 221/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
221#define SVE_SET_VL(arg) sve_set_current_vl(arg) 222#define SVE_SET_VL(arg) sve_set_current_vl(arg)
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
new file mode 100644
index 000000000000..e073e6886685
--- /dev/null
+++ b/arch/arm64/include/asm/sdei.h
@@ -0,0 +1,57 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (C) 2017 Arm Ltd.
3#ifndef __ASM_SDEI_H
4#define __ASM_SDEI_H
5
6/* Values for sdei_exit_mode */
7#define SDEI_EXIT_HVC 0
8#define SDEI_EXIT_SMC 1
9
10#define SDEI_STACK_SIZE IRQ_STACK_SIZE
11
12#ifndef __ASSEMBLY__
13
14#include <linux/linkage.h>
15#include <linux/preempt.h>
16#include <linux/types.h>
17
18#include <asm/virt.h>
19
20extern unsigned long sdei_exit_mode;
21
22/* Software Delegated Exception entry point from firmware*/
23asmlinkage void __sdei_asm_handler(unsigned long event_num, unsigned long arg,
24 unsigned long pc, unsigned long pstate);
25
26/* and its CONFIG_UNMAP_KERNEL_AT_EL0 trampoline */
27asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
28 unsigned long arg,
29 unsigned long pc,
30 unsigned long pstate);
31
32/*
33 * The above entry point does the minimum to call C code. This function does
34 * anything else, before calling the driver.
35 */
36struct sdei_registered_event;
37asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
38 struct sdei_registered_event *arg);
39
40unsigned long sdei_arch_get_entry_point(int conduit);
41#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
42
43bool _on_sdei_stack(unsigned long sp);
44static inline bool on_sdei_stack(unsigned long sp)
45{
46 if (!IS_ENABLED(CONFIG_VMAP_STACK))
47 return false;
48 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
49 return false;
50 if (in_nmi())
51 return _on_sdei_stack(sp);
52
53 return false;
54}
55
56#endif /* __ASSEMBLY__ */
57#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 941267caa39c..caab039d6305 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -28,5 +28,6 @@ extern char __initdata_begin[], __initdata_end[];
28extern char __inittext_begin[], __inittext_end[]; 28extern char __inittext_begin[], __inittext_end[];
29extern char __irqentry_text_start[], __irqentry_text_end[]; 29extern char __irqentry_text_start[], __irqentry_text_end[];
30extern char __mmuoff_data_start[], __mmuoff_data_end[]; 30extern char __mmuoff_data_start[], __mmuoff_data_end[];
31extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
31 32
32#endif /* __ASM_SECTIONS_H */ 33#endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 74a9d301819f..b299929fe56c 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -17,7 +17,7 @@
17#define __ASM_SPARSEMEM_H 17#define __ASM_SPARSEMEM_H
18 18
19#ifdef CONFIG_SPARSEMEM 19#ifdef CONFIG_SPARSEMEM
20#define MAX_PHYSMEM_BITS 48 20#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
21#define SECTION_SIZE_BITS 30 21#define SECTION_SIZE_BITS 30
22#endif 22#endif
23 23
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 6ad30776e984..472ef944e932 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -22,6 +22,7 @@
22 22
23#include <asm/memory.h> 23#include <asm/memory.h>
24#include <asm/ptrace.h> 24#include <asm/ptrace.h>
25#include <asm/sdei.h>
25 26
26struct stackframe { 27struct stackframe {
27 unsigned long fp; 28 unsigned long fp;
@@ -85,6 +86,8 @@ static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp
85 return true; 86 return true;
86 if (on_overflow_stack(sp)) 87 if (on_overflow_stack(sp))
87 return true; 88 return true;
89 if (on_sdei_stack(sp))
90 return true;
88 91
89 return false; 92 return false;
90} 93}
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 08cc88574659..0e1960c59197 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -20,6 +20,7 @@
20#ifndef __ASM_SYSREG_H 20#ifndef __ASM_SYSREG_H
21#define __ASM_SYSREG_H 21#define __ASM_SYSREG_H
22 22
23#include <asm/compiler.h>
23#include <linux/stringify.h> 24#include <linux/stringify.h>
24 25
25/* 26/*
@@ -175,6 +176,16 @@
175#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) 176#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0)
176#define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1) 177#define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1)
177#define SYS_ESR_EL1 sys_reg(3, 0, 5, 2, 0) 178#define SYS_ESR_EL1 sys_reg(3, 0, 5, 2, 0)
179
180#define SYS_ERRIDR_EL1 sys_reg(3, 0, 5, 3, 0)
181#define SYS_ERRSELR_EL1 sys_reg(3, 0, 5, 3, 1)
182#define SYS_ERXFR_EL1 sys_reg(3, 0, 5, 4, 0)
183#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
184#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
185#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
186#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
187#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
188
178#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) 189#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0)
179#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) 190#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
180 191
@@ -278,6 +289,7 @@
278#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) 289#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0)
279 290
280#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) 291#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
292#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1)
281 293
282#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0) 294#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0)
283#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1) 295#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1)
@@ -353,8 +365,10 @@
353 365
354#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) 366#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
355#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) 367#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
368#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
356#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) 369#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
357 370
371#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
358#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) 372#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
359#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) 373#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0)
360#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1) 374#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1)
@@ -398,27 +412,85 @@
398 412
399/* Common SCTLR_ELx flags. */ 413/* Common SCTLR_ELx flags. */
400#define SCTLR_ELx_EE (1 << 25) 414#define SCTLR_ELx_EE (1 << 25)
415#define SCTLR_ELx_IESB (1 << 21)
416#define SCTLR_ELx_WXN (1 << 19)
401#define SCTLR_ELx_I (1 << 12) 417#define SCTLR_ELx_I (1 << 12)
402#define SCTLR_ELx_SA (1 << 3) 418#define SCTLR_ELx_SA (1 << 3)
403#define SCTLR_ELx_C (1 << 2) 419#define SCTLR_ELx_C (1 << 2)
404#define SCTLR_ELx_A (1 << 1) 420#define SCTLR_ELx_A (1 << 1)
405#define SCTLR_ELx_M 1 421#define SCTLR_ELx_M 1
406 422
423#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
424 SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB)
425
426/* SCTLR_EL2 specific flags. */
407#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ 427#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \
408 (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ 428 (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \
409 (1 << 29)) 429 (1 << 29))
430#define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \
431 (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \
432 (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \
433 (1 << 27) | (1 << 30) | (1 << 31))
434
435#ifdef CONFIG_CPU_BIG_ENDIAN
436#define ENDIAN_SET_EL2 SCTLR_ELx_EE
437#define ENDIAN_CLEAR_EL2 0
438#else
439#define ENDIAN_SET_EL2 0
440#define ENDIAN_CLEAR_EL2 SCTLR_ELx_EE
441#endif
442
443/* SCTLR_EL2 value used for the hyp-stub */
444#define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
445#define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
446 SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \
447 ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0)
448
449/* Check all the bits are accounted for */
450#define SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != ~0)
410 451
411#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
412 SCTLR_ELx_SA | SCTLR_ELx_I)
413 452
414/* SCTLR_EL1 specific flags. */ 453/* SCTLR_EL1 specific flags. */
415#define SCTLR_EL1_UCI (1 << 26) 454#define SCTLR_EL1_UCI (1 << 26)
455#define SCTLR_EL1_E0E (1 << 24)
416#define SCTLR_EL1_SPAN (1 << 23) 456#define SCTLR_EL1_SPAN (1 << 23)
457#define SCTLR_EL1_NTWE (1 << 18)
458#define SCTLR_EL1_NTWI (1 << 16)
417#define SCTLR_EL1_UCT (1 << 15) 459#define SCTLR_EL1_UCT (1 << 15)
460#define SCTLR_EL1_DZE (1 << 14)
461#define SCTLR_EL1_UMA (1 << 9)
418#define SCTLR_EL1_SED (1 << 8) 462#define SCTLR_EL1_SED (1 << 8)
463#define SCTLR_EL1_ITD (1 << 7)
419#define SCTLR_EL1_CP15BEN (1 << 5) 464#define SCTLR_EL1_CP15BEN (1 << 5)
465#define SCTLR_EL1_SA0 (1 << 4)
466
467#define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \
468 (1 << 29))
469#define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \
470 (1 << 27) | (1 << 30) | (1 << 31))
471
472#ifdef CONFIG_CPU_BIG_ENDIAN
473#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
474#define ENDIAN_CLEAR_EL1 0
475#else
476#define ENDIAN_SET_EL1 0
477#define ENDIAN_CLEAR_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
478#endif
479
480#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\
481 SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\
482 SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\
483 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
484 ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1)
485#define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\
486 SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\
487 SCTLR_EL1_RES0)
488
489/* Check all the bits are accounted for */
490#define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0)
420 491
421/* id_aa64isar0 */ 492/* id_aa64isar0 */
493#define ID_AA64ISAR0_FHM_SHIFT 48
422#define ID_AA64ISAR0_DP_SHIFT 44 494#define ID_AA64ISAR0_DP_SHIFT 44
423#define ID_AA64ISAR0_SM4_SHIFT 40 495#define ID_AA64ISAR0_SM4_SHIFT 40
424#define ID_AA64ISAR0_SM3_SHIFT 36 496#define ID_AA64ISAR0_SM3_SHIFT 36
@@ -437,7 +509,10 @@
437#define ID_AA64ISAR1_DPB_SHIFT 0 509#define ID_AA64ISAR1_DPB_SHIFT 0
438 510
439/* id_aa64pfr0 */ 511/* id_aa64pfr0 */
512#define ID_AA64PFR0_CSV3_SHIFT 60
513#define ID_AA64PFR0_CSV2_SHIFT 56
440#define ID_AA64PFR0_SVE_SHIFT 32 514#define ID_AA64PFR0_SVE_SHIFT 32
515#define ID_AA64PFR0_RAS_SHIFT 28
441#define ID_AA64PFR0_GIC_SHIFT 24 516#define ID_AA64PFR0_GIC_SHIFT 24
442#define ID_AA64PFR0_ASIMD_SHIFT 20 517#define ID_AA64PFR0_ASIMD_SHIFT 20
443#define ID_AA64PFR0_FP_SHIFT 16 518#define ID_AA64PFR0_FP_SHIFT 16
@@ -447,6 +522,7 @@
447#define ID_AA64PFR0_EL0_SHIFT 0 522#define ID_AA64PFR0_EL0_SHIFT 0
448 523
449#define ID_AA64PFR0_SVE 0x1 524#define ID_AA64PFR0_SVE 0x1
525#define ID_AA64PFR0_RAS_V1 0x1
450#define ID_AA64PFR0_FP_NI 0xf 526#define ID_AA64PFR0_FP_NI 0xf
451#define ID_AA64PFR0_FP_SUPPORTED 0x0 527#define ID_AA64PFR0_FP_SUPPORTED 0x0
452#define ID_AA64PFR0_ASIMD_NI 0xf 528#define ID_AA64PFR0_ASIMD_NI 0xf
@@ -471,6 +547,14 @@
471#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 547#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
472#define ID_AA64MMFR0_TGRAN16_NI 0x0 548#define ID_AA64MMFR0_TGRAN16_NI 0x0
473#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 549#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
550#define ID_AA64MMFR0_PARANGE_48 0x5
551#define ID_AA64MMFR0_PARANGE_52 0x6
552
553#ifdef CONFIG_ARM64_PA_BITS_52
554#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52
555#else
556#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48
557#endif
474 558
475/* id_aa64mmfr1 */ 559/* id_aa64mmfr1 */
476#define ID_AA64MMFR1_PAN_SHIFT 20 560#define ID_AA64MMFR1_PAN_SHIFT 20
@@ -582,6 +666,7 @@
582 666
583#else 667#else
584 668
669#include <linux/build_bug.h>
585#include <linux/types.h> 670#include <linux/types.h>
586 671
587asm( 672asm(
@@ -638,6 +723,9 @@ static inline void config_sctlr_el1(u32 clear, u32 set)
638{ 723{
639 u32 val; 724 u32 val;
640 725
726 SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS;
727 SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS;
728
641 val = read_sysreg(sctlr_el1); 729 val = read_sysreg(sctlr_el1);
642 val &= ~clear; 730 val &= ~clear;
643 val |= set; 731 val |= set;
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index af1c76981911..9e82dd79c7db 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -23,6 +23,7 @@
23 23
24#include <linux/sched.h> 24#include <linux/sched.h>
25#include <asm/cputype.h> 25#include <asm/cputype.h>
26#include <asm/mmu.h>
26 27
27/* 28/*
28 * Raw TLBI operations. 29 * Raw TLBI operations.
@@ -54,6 +55,11 @@
54 55
55#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) 56#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
56 57
58#define __tlbi_user(op, arg) do { \
59 if (arm64_kernel_unmapped_at_el0()) \
60 __tlbi(op, (arg) | USER_ASID_FLAG); \
61} while (0)
62
57/* 63/*
58 * TLB Management 64 * TLB Management
59 * ============== 65 * ==============
@@ -115,6 +121,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
115 121
116 dsb(ishst); 122 dsb(ishst);
117 __tlbi(aside1is, asid); 123 __tlbi(aside1is, asid);
124 __tlbi_user(aside1is, asid);
118 dsb(ish); 125 dsb(ish);
119} 126}
120 127
@@ -125,6 +132,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
125 132
126 dsb(ishst); 133 dsb(ishst);
127 __tlbi(vale1is, addr); 134 __tlbi(vale1is, addr);
135 __tlbi_user(vale1is, addr);
128 dsb(ish); 136 dsb(ish);
129} 137}
130 138
@@ -151,10 +159,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
151 159
152 dsb(ishst); 160 dsb(ishst);
153 for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { 161 for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
154 if (last_level) 162 if (last_level) {
155 __tlbi(vale1is, addr); 163 __tlbi(vale1is, addr);
156 else 164 __tlbi_user(vale1is, addr);
165 } else {
157 __tlbi(vae1is, addr); 166 __tlbi(vae1is, addr);
167 __tlbi_user(vae1is, addr);
168 }
158 } 169 }
159 dsb(ish); 170 dsb(ish);
160} 171}
@@ -194,6 +205,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,
194 unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); 205 unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
195 206
196 __tlbi(vae1is, addr); 207 __tlbi(vae1is, addr);
208 __tlbi_user(vae1is, addr);
197 dsb(ish); 209 dsb(ish);
198} 210}
199 211
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 1696f9de9359..178e338d2889 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -19,6 +19,7 @@
19#define __ASM_TRAP_H 19#define __ASM_TRAP_H
20 20
21#include <linux/list.h> 21#include <linux/list.h>
22#include <asm/esr.h>
22#include <asm/sections.h> 23#include <asm/sections.h>
23 24
24struct pt_regs; 25struct pt_regs;
@@ -66,4 +67,57 @@ static inline int in_entry_text(unsigned long ptr)
66 return ptr >= (unsigned long)&__entry_text_start && 67 return ptr >= (unsigned long)&__entry_text_start &&
67 ptr < (unsigned long)&__entry_text_end; 68 ptr < (unsigned long)&__entry_text_end;
68} 69}
70
71/*
72 * CPUs with the RAS extensions have an Implementation-Defined-Syndrome bit
73 * to indicate whether this ESR has a RAS encoding. CPUs without this feature
74 * have a ISS-Valid bit in the same position.
75 * If this bit is set, we know its not a RAS SError.
76 * If its clear, we need to know if the CPU supports RAS. Uncategorized RAS
77 * errors share the same encoding as an all-zeros encoding from a CPU that
78 * doesn't support RAS.
79 */
80static inline bool arm64_is_ras_serror(u32 esr)
81{
82 WARN_ON(preemptible());
83
84 if (esr & ESR_ELx_IDS)
85 return false;
86
87 if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN))
88 return true;
89 else
90 return false;
91}
92
93/*
94 * Return the AET bits from a RAS SError's ESR.
95 *
96 * It is implementation defined whether Uncategorized errors are containable.
97 * We treat them as Uncontainable.
98 * Non-RAS SError's are reported as Uncontained/Uncategorized.
99 */
100static inline u32 arm64_ras_serror_get_severity(u32 esr)
101{
102 u32 aet = esr & ESR_ELx_AET;
103
104 if (!arm64_is_ras_serror(esr)) {
105 /* Not a RAS error, we can't interpret the ESR. */
106 return ESR_ELx_AET_UC;
107 }
108
109 /*
110 * AET is RES0 if 'the value returned in the DFSC field is not
111 * [ESR_ELx_FSC_SERROR]'
112 */
113 if ((esr & ESR_ELx_FSC) != ESR_ELx_FSC_SERROR) {
114 /* No severity information : Uncategorized */
115 return ESR_ELx_AET_UC;
116 }
117
118 return aet;
119}
120
121bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr);
122void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr);
69#endif 123#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index fc0f9eb66039..59fda5292936 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -105,17 +105,23 @@ static inline void set_fs(mm_segment_t fs)
105#ifdef CONFIG_ARM64_SW_TTBR0_PAN 105#ifdef CONFIG_ARM64_SW_TTBR0_PAN
106static inline void __uaccess_ttbr0_disable(void) 106static inline void __uaccess_ttbr0_disable(void)
107{ 107{
108 unsigned long ttbr; 108 unsigned long flags, ttbr;
109 109
110 /* reserved_ttbr0 placed at the end of swapper_pg_dir */ 110 local_irq_save(flags);
111 ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; 111 ttbr = read_sysreg(ttbr1_el1);
112 write_sysreg(ttbr, ttbr0_el1); 112 ttbr &= ~TTBR_ASID_MASK;
113 /* reserved_ttbr0 placed before swapper_pg_dir */
114 write_sysreg(ttbr - RESERVED_TTBR0_SIZE, ttbr0_el1);
115 isb();
116 /* Set reserved ASID */
117 write_sysreg(ttbr, ttbr1_el1);
113 isb(); 118 isb();
119 local_irq_restore(flags);
114} 120}
115 121
116static inline void __uaccess_ttbr0_enable(void) 122static inline void __uaccess_ttbr0_enable(void)
117{ 123{
118 unsigned long flags; 124 unsigned long flags, ttbr0, ttbr1;
119 125
120 /* 126 /*
121 * Disable interrupts to avoid preemption between reading the 'ttbr0' 127 * Disable interrupts to avoid preemption between reading the 'ttbr0'
@@ -123,7 +129,17 @@ static inline void __uaccess_ttbr0_enable(void)
123 * roll-over and an update of 'ttbr0'. 129 * roll-over and an update of 'ttbr0'.
124 */ 130 */
125 local_irq_save(flags); 131 local_irq_save(flags);
126 write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); 132 ttbr0 = READ_ONCE(current_thread_info()->ttbr0);
133
134 /* Restore active ASID */
135 ttbr1 = read_sysreg(ttbr1_el1);
136 ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
137 ttbr1 |= ttbr0 & TTBR_ASID_MASK;
138 write_sysreg(ttbr1, ttbr1_el1);
139 isb();
140
141 /* Restore user page table */
142 write_sysreg(ttbr0, ttbr0_el1);
127 isb(); 143 isb();
128 local_irq_restore(flags); 144 local_irq_restore(flags);
129} 145}
@@ -155,6 +171,18 @@ static inline bool uaccess_ttbr0_enable(void)
155} 171}
156#endif 172#endif
157 173
174static inline void __uaccess_disable_hw_pan(void)
175{
176 asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,
177 CONFIG_ARM64_PAN));
178}
179
180static inline void __uaccess_enable_hw_pan(void)
181{
182 asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,
183 CONFIG_ARM64_PAN));
184}
185
158#define __uaccess_disable(alt) \ 186#define __uaccess_disable(alt) \
159do { \ 187do { \
160 if (!uaccess_ttbr0_disable()) \ 188 if (!uaccess_ttbr0_disable()) \
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
new file mode 100644
index 000000000000..0b5ec6e08c10
--- /dev/null
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -0,0 +1,28 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (C) 2017 Arm Ltd.
3#ifndef __ASM_VMAP_STACK_H
4#define __ASM_VMAP_STACK_H
5
6#include <linux/bug.h>
7#include <linux/gfp.h>
8#include <linux/kconfig.h>
9#include <linux/vmalloc.h>
10#include <asm/memory.h>
11#include <asm/pgtable.h>
12#include <asm/thread_info.h>
13
14/*
15 * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
16 * stacks need to have the same alignment.
17 */
18static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
19{
20 BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
21
22 return __vmalloc_node_range(stack_size, THREAD_ALIGN,
23 VMALLOC_START, VMALLOC_END,
24 THREADINFO_GFP, PAGE_KERNEL, 0, node,
25 __builtin_return_address(0));
26}
27
28#endif /* __ASM_VMAP_STACK_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index cda76fa8b9b2..f018c3deea3b 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -43,5 +43,6 @@
43#define HWCAP_ASIMDDP (1 << 20) 43#define HWCAP_ASIMDDP (1 << 20)
44#define HWCAP_SHA512 (1 << 21) 44#define HWCAP_SHA512 (1 << 21)
45#define HWCAP_SVE (1 << 22) 45#define HWCAP_SVE (1 << 22)
46#define HWCAP_ASIMDFHM (1 << 23)
46 47
47#endif /* _UAPI__ASM_HWCAP_H */ 48#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 067baace74a0..b87541360f43 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -52,6 +52,11 @@ arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
52arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o 52arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
53arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o 53arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
54arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 54arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
55arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
56
57ifeq ($(CONFIG_KVM),y)
58arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
59endif
55 60
56obj-y += $(arm64-obj-y) vdso/ probes/ 61obj-y += $(arm64-obj-y) vdso/ probes/
57obj-m += $(arm64-obj-m) 62obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index b3162715ed78..252396a96c78 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -117,7 +117,7 @@ bool __init acpi_psci_present(void)
117} 117}
118 118
119/* Whether HVC must be used instead of SMC as the PSCI conduit */ 119/* Whether HVC must be used instead of SMC as the PSCI conduit */
120bool __init acpi_psci_use_hvc(void) 120bool acpi_psci_use_hvc(void)
121{ 121{
122 return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC; 122 return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC;
123} 123}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 6dd0a3a3e5c9..414288a558c8 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -32,6 +32,8 @@
32#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) 32#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
33#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) 33#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
34 34
35int alternatives_applied;
36
35struct alt_region { 37struct alt_region {
36 struct alt_instr *begin; 38 struct alt_instr *begin;
37 struct alt_instr *end; 39 struct alt_instr *end;
@@ -143,7 +145,6 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
143 */ 145 */
144static int __apply_alternatives_multi_stop(void *unused) 146static int __apply_alternatives_multi_stop(void *unused)
145{ 147{
146 static int patched = 0;
147 struct alt_region region = { 148 struct alt_region region = {
148 .begin = (struct alt_instr *)__alt_instructions, 149 .begin = (struct alt_instr *)__alt_instructions,
149 .end = (struct alt_instr *)__alt_instructions_end, 150 .end = (struct alt_instr *)__alt_instructions_end,
@@ -151,14 +152,14 @@ static int __apply_alternatives_multi_stop(void *unused)
151 152
152 /* We always have a CPU 0 at this point (__init) */ 153 /* We always have a CPU 0 at this point (__init) */
153 if (smp_processor_id()) { 154 if (smp_processor_id()) {
154 while (!READ_ONCE(patched)) 155 while (!READ_ONCE(alternatives_applied))
155 cpu_relax(); 156 cpu_relax();
156 isb(); 157 isb();
157 } else { 158 } else {
158 BUG_ON(patched); 159 BUG_ON(alternatives_applied);
159 __apply_alternatives(&region, true); 160 __apply_alternatives(&region, true);
160 /* Barriers provided by the cache flushing */ 161 /* Barriers provided by the cache flushing */
161 WRITE_ONCE(patched, 1); 162 WRITE_ONCE(alternatives_applied, 1);
162 } 163 }
163 164
164 return 0; 165 return 0;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 71bf088f1e4b..1303e04110cd 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -18,12 +18,14 @@
18 * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */ 19 */
20 20
21#include <linux/arm_sdei.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
22#include <linux/mm.h> 23#include <linux/mm.h>
23#include <linux/dma-mapping.h> 24#include <linux/dma-mapping.h>
24#include <linux/kvm_host.h> 25#include <linux/kvm_host.h>
25#include <linux/suspend.h> 26#include <linux/suspend.h>
26#include <asm/cpufeature.h> 27#include <asm/cpufeature.h>
28#include <asm/fixmap.h>
27#include <asm/thread_info.h> 29#include <asm/thread_info.h>
28#include <asm/memory.h> 30#include <asm/memory.h>
29#include <asm/smp_plat.h> 31#include <asm/smp_plat.h>
@@ -130,6 +132,7 @@ int main(void)
130 BLANK(); 132 BLANK();
131#ifdef CONFIG_KVM_ARM_HOST 133#ifdef CONFIG_KVM_ARM_HOST
132 DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); 134 DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
135 DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
133 DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); 136 DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
134 DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); 137 DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
135 DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); 138 DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
@@ -148,11 +151,18 @@ int main(void)
148 DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); 151 DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
149 DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); 152 DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id));
150 DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); 153 DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state));
151
152 BLANK(); 154 BLANK();
153 DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); 155 DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
154 DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); 156 DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
155 DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); 157 DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
156 DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val)); 158 DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val));
159 BLANK();
160#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
161 DEFINE(TRAMP_VALIAS, TRAMP_VALIAS);
162#endif
163#ifdef CONFIG_ARM_SDE_INTERFACE
164 DEFINE(SDEI_EVENT_INTREGS, offsetof(struct sdei_registered_event, interrupted_regs));
165 DEFINE(SDEI_EVENT_PRIORITY, offsetof(struct sdei_registered_event, priority));
166#endif
157 return 0; 167 return 0;
158} 168}
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
new file mode 100644
index 000000000000..76225c2611ea
--- /dev/null
+++ b/arch/arm64/kernel/bpi.S
@@ -0,0 +1,87 @@
1/*
2 * Contains CPU specific branch predictor invalidation sequences
3 *
4 * Copyright (C) 2018 ARM Ltd.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/linkage.h>
20
21.macro ventry target
22 .rept 31
23 nop
24 .endr
25 b \target
26.endm
27
28.macro vectors target
29 ventry \target + 0x000
30 ventry \target + 0x080
31 ventry \target + 0x100
32 ventry \target + 0x180
33
34 ventry \target + 0x200
35 ventry \target + 0x280
36 ventry \target + 0x300
37 ventry \target + 0x380
38
39 ventry \target + 0x400
40 ventry \target + 0x480
41 ventry \target + 0x500
42 ventry \target + 0x580
43
44 ventry \target + 0x600
45 ventry \target + 0x680
46 ventry \target + 0x700
47 ventry \target + 0x780
48.endm
49
50 .align 11
51ENTRY(__bp_harden_hyp_vecs_start)
52 .rept 4
53 vectors __kvm_hyp_vector
54 .endr
55ENTRY(__bp_harden_hyp_vecs_end)
56ENTRY(__psci_hyp_bp_inval_start)
57 sub sp, sp, #(8 * 18)
58 stp x16, x17, [sp, #(16 * 0)]
59 stp x14, x15, [sp, #(16 * 1)]
60 stp x12, x13, [sp, #(16 * 2)]
61 stp x10, x11, [sp, #(16 * 3)]
62 stp x8, x9, [sp, #(16 * 4)]
63 stp x6, x7, [sp, #(16 * 5)]
64 stp x4, x5, [sp, #(16 * 6)]
65 stp x2, x3, [sp, #(16 * 7)]
66 stp x0, x1, [sp, #(16 * 8)]
67 mov x0, #0x84000000
68 smc #0
69 ldp x16, x17, [sp, #(16 * 0)]
70 ldp x14, x15, [sp, #(16 * 1)]
71 ldp x12, x13, [sp, #(16 * 2)]
72 ldp x10, x11, [sp, #(16 * 3)]
73 ldp x8, x9, [sp, #(16 * 4)]
74 ldp x6, x7, [sp, #(16 * 5)]
75 ldp x4, x5, [sp, #(16 * 6)]
76 ldp x2, x3, [sp, #(16 * 7)]
77 ldp x0, x1, [sp, #(16 * 8)]
78 add sp, sp, #(8 * 18)
79ENTRY(__psci_hyp_bp_inval_end)
80
81ENTRY(__qcom_hyp_sanitize_link_stack_start)
82 stp x29, x30, [sp, #-16]!
83 .rept 16
84 bl . + 4
85 .endr
86 ldp x29, x30, [sp], #16
87ENTRY(__qcom_hyp_sanitize_link_stack_end)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 0e27f86ee709..ed6881882231 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -30,6 +30,20 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
30 entry->midr_range_max); 30 entry->midr_range_max);
31} 31}
32 32
33static bool __maybe_unused
34is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope)
35{
36 u32 model;
37
38 WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
39
40 model = read_cpuid_id();
41 model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) |
42 MIDR_ARCHITECTURE_MASK;
43
44 return model == entry->midr_model;
45}
46
33static bool 47static bool
34has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, 48has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
35 int scope) 49 int scope)
@@ -46,6 +60,127 @@ static int cpu_enable_trap_ctr_access(void *__unused)
46 return 0; 60 return 0;
47} 61}
48 62
63#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
64#include <asm/mmu_context.h>
65#include <asm/cacheflush.h>
66
67DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
68
69#ifdef CONFIG_KVM
70extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[];
71extern char __qcom_hyp_sanitize_link_stack_start[];
72extern char __qcom_hyp_sanitize_link_stack_end[];
73
74static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
75 const char *hyp_vecs_end)
76{
77 void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K);
78 int i;
79
80 for (i = 0; i < SZ_2K; i += 0x80)
81 memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
82
83 flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
84}
85
86static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
87 const char *hyp_vecs_start,
88 const char *hyp_vecs_end)
89{
90 static int last_slot = -1;
91 static DEFINE_SPINLOCK(bp_lock);
92 int cpu, slot = -1;
93
94 spin_lock(&bp_lock);
95 for_each_possible_cpu(cpu) {
96 if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
97 slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
98 break;
99 }
100 }
101
102 if (slot == -1) {
103 last_slot++;
104 BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
105 / SZ_2K) <= last_slot);
106 slot = last_slot;
107 __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
108 }
109
110 __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
111 __this_cpu_write(bp_hardening_data.fn, fn);
112 spin_unlock(&bp_lock);
113}
114#else
115#define __psci_hyp_bp_inval_start NULL
116#define __psci_hyp_bp_inval_end NULL
117#define __qcom_hyp_sanitize_link_stack_start NULL
118#define __qcom_hyp_sanitize_link_stack_end NULL
119
120static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
121 const char *hyp_vecs_start,
122 const char *hyp_vecs_end)
123{
124 __this_cpu_write(bp_hardening_data.fn, fn);
125}
126#endif /* CONFIG_KVM */
127
128static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
129 bp_hardening_cb_t fn,
130 const char *hyp_vecs_start,
131 const char *hyp_vecs_end)
132{
133 u64 pfr0;
134
135 if (!entry->matches(entry, SCOPE_LOCAL_CPU))
136 return;
137
138 pfr0 = read_cpuid(ID_AA64PFR0_EL1);
139 if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
140 return;
141
142 __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end);
143}
144
145#include <linux/psci.h>
146
147static int enable_psci_bp_hardening(void *data)
148{
149 const struct arm64_cpu_capabilities *entry = data;
150
151 if (psci_ops.get_version)
152 install_bp_hardening_cb(entry,
153 (bp_hardening_cb_t)psci_ops.get_version,
154 __psci_hyp_bp_inval_start,
155 __psci_hyp_bp_inval_end);
156
157 return 0;
158}
159
160static void qcom_link_stack_sanitization(void)
161{
162 u64 tmp;
163
164 asm volatile("mov %0, x30 \n"
165 ".rept 16 \n"
166 "bl . + 4 \n"
167 ".endr \n"
168 "mov x30, %0 \n"
169 : "=&r" (tmp));
170}
171
172static int qcom_enable_link_stack_sanitization(void *data)
173{
174 const struct arm64_cpu_capabilities *entry = data;
175
176 install_bp_hardening_cb(entry, qcom_link_stack_sanitization,
177 __qcom_hyp_sanitize_link_stack_start,
178 __qcom_hyp_sanitize_link_stack_end);
179
180 return 0;
181}
182#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
183
49#define MIDR_RANGE(model, min, max) \ 184#define MIDR_RANGE(model, min, max) \
50 .def_scope = SCOPE_LOCAL_CPU, \ 185 .def_scope = SCOPE_LOCAL_CPU, \
51 .matches = is_affected_midr_range, \ 186 .matches = is_affected_midr_range, \
@@ -169,6 +304,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
169 MIDR_CPU_VAR_REV(0, 0), 304 MIDR_CPU_VAR_REV(0, 0),
170 MIDR_CPU_VAR_REV(0, 0)), 305 MIDR_CPU_VAR_REV(0, 0)),
171 }, 306 },
307 {
308 .desc = "Qualcomm Technologies Kryo erratum 1003",
309 .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
310 .def_scope = SCOPE_LOCAL_CPU,
311 .midr_model = MIDR_QCOM_KRYO,
312 .matches = is_kryo_midr,
313 },
172#endif 314#endif
173#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 315#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
174 { 316 {
@@ -187,6 +329,47 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
187 MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), 329 MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
188 }, 330 },
189#endif 331#endif
332#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
333 {
334 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
335 MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
336 .enable = enable_psci_bp_hardening,
337 },
338 {
339 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
340 MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
341 .enable = enable_psci_bp_hardening,
342 },
343 {
344 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
345 MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
346 .enable = enable_psci_bp_hardening,
347 },
348 {
349 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
350 MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
351 .enable = enable_psci_bp_hardening,
352 },
353 {
354 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
355 MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
356 .enable = qcom_enable_link_stack_sanitization,
357 },
358 {
359 .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
360 MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
361 },
362 {
363 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
364 MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
365 .enable = enable_psci_bp_hardening,
366 },
367 {
368 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
369 MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
370 .enable = enable_psci_bp_hardening,
371 },
372#endif
190 { 373 {
191 } 374 }
192}; 375};
@@ -200,15 +383,18 @@ void verify_local_cpu_errata_workarounds(void)
200{ 383{
201 const struct arm64_cpu_capabilities *caps = arm64_errata; 384 const struct arm64_cpu_capabilities *caps = arm64_errata;
202 385
203 for (; caps->matches; caps++) 386 for (; caps->matches; caps++) {
204 if (!cpus_have_cap(caps->capability) && 387 if (cpus_have_cap(caps->capability)) {
205 caps->matches(caps, SCOPE_LOCAL_CPU)) { 388 if (caps->enable)
389 caps->enable((void *)caps);
390 } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) {
206 pr_crit("CPU%d: Requires work around for %s, not detected" 391 pr_crit("CPU%d: Requires work around for %s, not detected"
207 " at boot time\n", 392 " at boot time\n",
208 smp_processor_id(), 393 smp_processor_id(),
209 caps->desc ? : "an erratum"); 394 caps->desc ? : "an erratum");
210 cpu_die_early(); 395 cpu_die_early();
211 } 396 }
397 }
212} 398}
213 399
214void update_cpu_errata_workarounds(void) 400void update_cpu_errata_workarounds(void)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a73a5928f09b..0fb6a3151443 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -123,6 +123,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
123 * sync with the documentation of the CPU feature register ABI. 123 * sync with the documentation of the CPU feature register ABI.
124 */ 124 */
125static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { 125static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
126 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
126 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), 127 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
127 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), 128 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
128 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), 129 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
@@ -145,8 +146,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
145}; 146};
146 147
147static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { 148static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
149 ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
150 ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
148 ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), 151 ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
149 FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), 152 FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
153 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
150 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), 154 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
151 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), 155 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
152 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), 156 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
@@ -846,6 +850,67 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
846 ID_AA64PFR0_FP_SHIFT) < 0; 850 ID_AA64PFR0_FP_SHIFT) < 0;
847} 851}
848 852
853#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
854static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
855
856static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
857 int __unused)
858{
859 u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
860
861 /* Forced on command line? */
862 if (__kpti_forced) {
863 pr_info_once("kernel page table isolation forced %s by command line option\n",
864 __kpti_forced > 0 ? "ON" : "OFF");
865 return __kpti_forced > 0;
866 }
867
868 /* Useful for KASLR robustness */
869 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
870 return true;
871
872 /* Don't force KPTI for CPUs that are not vulnerable */
873 switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) {
874 case MIDR_CAVIUM_THUNDERX2:
875 case MIDR_BRCM_VULCAN:
876 return false;
877 }
878
879 /* Defer to CPU feature registers */
880 return !cpuid_feature_extract_unsigned_field(pfr0,
881 ID_AA64PFR0_CSV3_SHIFT);
882}
883
884static int __init parse_kpti(char *str)
885{
886 bool enabled;
887 int ret = strtobool(str, &enabled);
888
889 if (ret)
890 return ret;
891
892 __kpti_forced = enabled ? 1 : -1;
893 return 0;
894}
895__setup("kpti=", parse_kpti);
896#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
897
898static int cpu_copy_el2regs(void *__unused)
899{
900 /*
901 * Copy register values that aren't redirected by hardware.
902 *
903 * Before code patching, we only set tpidr_el1, all CPUs need to copy
904 * this value to tpidr_el2 before we patch the code. Once we've done
905 * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
906 * do anything here.
907 */
908 if (!alternatives_applied)
909 write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
910
911 return 0;
912}
913
849static const struct arm64_cpu_capabilities arm64_features[] = { 914static const struct arm64_cpu_capabilities arm64_features[] = {
850 { 915 {
851 .desc = "GIC system register CPU interface", 916 .desc = "GIC system register CPU interface",
@@ -915,6 +980,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
915 .capability = ARM64_HAS_VIRT_HOST_EXTN, 980 .capability = ARM64_HAS_VIRT_HOST_EXTN,
916 .def_scope = SCOPE_SYSTEM, 981 .def_scope = SCOPE_SYSTEM,
917 .matches = runs_at_el2, 982 .matches = runs_at_el2,
983 .enable = cpu_copy_el2regs,
918 }, 984 },
919 { 985 {
920 .desc = "32-bit EL0 Support", 986 .desc = "32-bit EL0 Support",
@@ -932,6 +998,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
932 .def_scope = SCOPE_SYSTEM, 998 .def_scope = SCOPE_SYSTEM,
933 .matches = hyp_offset_low, 999 .matches = hyp_offset_low,
934 }, 1000 },
1001#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
1002 {
1003 .desc = "Kernel page table isolation (KPTI)",
1004 .capability = ARM64_UNMAP_KERNEL_AT_EL0,
1005 .def_scope = SCOPE_SYSTEM,
1006 .matches = unmap_kernel_at_el0,
1007 },
1008#endif
935 { 1009 {
936 /* FP/SIMD is not implemented */ 1010 /* FP/SIMD is not implemented */
937 .capability = ARM64_HAS_NO_FPSIMD, 1011 .capability = ARM64_HAS_NO_FPSIMD,
@@ -963,6 +1037,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
963 .enable = sve_kernel_enable, 1037 .enable = sve_kernel_enable,
964 }, 1038 },
965#endif /* CONFIG_ARM64_SVE */ 1039#endif /* CONFIG_ARM64_SVE */
1040#ifdef CONFIG_ARM64_RAS_EXTN
1041 {
1042 .desc = "RAS Extension Support",
1043 .capability = ARM64_HAS_RAS_EXTN,
1044 .def_scope = SCOPE_SYSTEM,
1045 .matches = has_cpuid_feature,
1046 .sys_reg = SYS_ID_AA64PFR0_EL1,
1047 .sign = FTR_UNSIGNED,
1048 .field_pos = ID_AA64PFR0_RAS_SHIFT,
1049 .min_field_value = ID_AA64PFR0_RAS_V1,
1050 .enable = cpu_clear_disr,
1051 },
1052#endif /* CONFIG_ARM64_RAS_EXTN */
966 {}, 1053 {},
967}; 1054};
968 1055
@@ -992,6 +1079,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
992 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), 1079 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
993 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), 1080 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
994 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), 1081 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
1082 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),
995 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), 1083 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
996 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), 1084 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
997 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), 1085 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
@@ -1071,6 +1159,25 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
1071 cap_set_elf_hwcap(hwcaps); 1159 cap_set_elf_hwcap(hwcaps);
1072} 1160}
1073 1161
1162/*
1163 * Check if the current CPU has a given feature capability.
1164 * Should be called from non-preemptible context.
1165 */
1166static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array,
1167 unsigned int cap)
1168{
1169 const struct arm64_cpu_capabilities *caps;
1170
1171 if (WARN_ON(preemptible()))
1172 return false;
1173
1174 for (caps = cap_array; caps->matches; caps++)
1175 if (caps->capability == cap &&
1176 caps->matches(caps, SCOPE_LOCAL_CPU))
1177 return true;
1178 return false;
1179}
1180
1074void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, 1181void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
1075 const char *info) 1182 const char *info)
1076{ 1183{
@@ -1106,7 +1213,7 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
1106 * uses an IPI, giving us a PSTATE that disappears when 1213 * uses an IPI, giving us a PSTATE that disappears when
1107 * we return. 1214 * we return.
1108 */ 1215 */
1109 stop_machine(caps->enable, NULL, cpu_online_mask); 1216 stop_machine(caps->enable, (void *)caps, cpu_online_mask);
1110 } 1217 }
1111 } 1218 }
1112} 1219}
@@ -1134,8 +1241,9 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
1134} 1241}
1135 1242
1136static void 1243static void
1137verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) 1244verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list)
1138{ 1245{
1246 const struct arm64_cpu_capabilities *caps = caps_list;
1139 for (; caps->matches; caps++) { 1247 for (; caps->matches; caps++) {
1140 if (!cpus_have_cap(caps->capability)) 1248 if (!cpus_have_cap(caps->capability))
1141 continue; 1249 continue;
@@ -1143,13 +1251,13 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
1143 * If the new CPU misses an advertised feature, we cannot proceed 1251 * If the new CPU misses an advertised feature, we cannot proceed
1144 * further, park the cpu. 1252 * further, park the cpu.
1145 */ 1253 */
1146 if (!caps->matches(caps, SCOPE_LOCAL_CPU)) { 1254 if (!__this_cpu_has_cap(caps_list, caps->capability)) {
1147 pr_crit("CPU%d: missing feature: %s\n", 1255 pr_crit("CPU%d: missing feature: %s\n",
1148 smp_processor_id(), caps->desc); 1256 smp_processor_id(), caps->desc);
1149 cpu_die_early(); 1257 cpu_die_early();
1150 } 1258 }
1151 if (caps->enable) 1259 if (caps->enable)
1152 caps->enable(NULL); 1260 caps->enable((void *)caps);
1153 } 1261 }
1154} 1262}
1155 1263
@@ -1189,6 +1297,9 @@ static void verify_local_cpu_capabilities(void)
1189 1297
1190 if (system_supports_sve()) 1298 if (system_supports_sve())
1191 verify_sve_features(); 1299 verify_sve_features();
1300
1301 if (system_uses_ttbr0_pan())
1302 pr_info("Emulating Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
1192} 1303}
1193 1304
1194void check_local_cpu_capabilities(void) 1305void check_local_cpu_capabilities(void)
@@ -1225,25 +1336,6 @@ static void __init mark_const_caps_ready(void)
1225 static_branch_enable(&arm64_const_caps_ready); 1336 static_branch_enable(&arm64_const_caps_ready);
1226} 1337}
1227 1338
1228/*
1229 * Check if the current CPU has a given feature capability.
1230 * Should be called from non-preemptible context.
1231 */
1232static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array,
1233 unsigned int cap)
1234{
1235 const struct arm64_cpu_capabilities *caps;
1236
1237 if (WARN_ON(preemptible()))
1238 return false;
1239
1240 for (caps = cap_array; caps->desc; caps++)
1241 if (caps->capability == cap && caps->matches)
1242 return caps->matches(caps, SCOPE_LOCAL_CPU);
1243
1244 return false;
1245}
1246
1247extern const struct arm64_cpu_capabilities arm64_errata[]; 1339extern const struct arm64_cpu_capabilities arm64_errata[];
1248 1340
1249bool this_cpu_has_cap(unsigned int cap) 1341bool this_cpu_has_cap(unsigned int cap)
@@ -1387,3 +1479,11 @@ static int __init enable_mrs_emulation(void)
1387} 1479}
1388 1480
1389core_initcall(enable_mrs_emulation); 1481core_initcall(enable_mrs_emulation);
1482
1483int cpu_clear_disr(void *__unused)
1484{
1485 /* Firmware may have left a deferred SError in this register. */
1486 write_sysreg_s(0, SYS_DISR_EL1);
1487
1488 return 0;
1489}
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index fd691087dc9a..f2d13810daa8 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -47,6 +47,8 @@ int arm_cpuidle_suspend(int index)
47 47
48#include <acpi/processor.h> 48#include <acpi/processor.h>
49 49
50#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags))
51
50int acpi_processor_ffh_lpi_probe(unsigned int cpu) 52int acpi_processor_ffh_lpi_probe(unsigned int cpu)
51{ 53{
52 return arm_cpuidle_init(cpu); 54 return arm_cpuidle_init(cpu);
@@ -54,6 +56,10 @@ int acpi_processor_ffh_lpi_probe(unsigned int cpu)
54 56
55int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) 57int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
56{ 58{
57 return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index); 59 if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
60 return CPU_PM_CPU_IDLE_ENTER_RETENTION(arm_cpuidle_suspend,
61 lpi->index);
62 else
63 return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index);
58} 64}
59#endif 65#endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 1e2554543506..7f94623df8a5 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -76,6 +76,7 @@ static const char *const hwcap_str[] = {
76 "asimddp", 76 "asimddp",
77 "sha512", 77 "sha512",
78 "sve", 78 "sve",
79 "asimdfhm",
79 NULL 80 NULL
80}; 81};
81 82
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6d14b8f29b5f..b34e717d7597 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -28,6 +28,8 @@
28#include <asm/errno.h> 28#include <asm/errno.h>
29#include <asm/esr.h> 29#include <asm/esr.h>
30#include <asm/irq.h> 30#include <asm/irq.h>
31#include <asm/memory.h>
32#include <asm/mmu.h>
31#include <asm/processor.h> 33#include <asm/processor.h>
32#include <asm/ptrace.h> 34#include <asm/ptrace.h>
33#include <asm/thread_info.h> 35#include <asm/thread_info.h>
@@ -69,8 +71,21 @@
69#define BAD_FIQ 2 71#define BAD_FIQ 2
70#define BAD_ERROR 3 72#define BAD_ERROR 3
71 73
72 .macro kernel_ventry label 74 .macro kernel_ventry, el, label, regsize = 64
73 .align 7 75 .align 7
76#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
77alternative_if ARM64_UNMAP_KERNEL_AT_EL0
78 .if \el == 0
79 .if \regsize == 64
80 mrs x30, tpidrro_el0
81 msr tpidrro_el0, xzr
82 .else
83 mov x30, xzr
84 .endif
85 .endif
86alternative_else_nop_endif
87#endif
88
74 sub sp, sp, #S_FRAME_SIZE 89 sub sp, sp, #S_FRAME_SIZE
75#ifdef CONFIG_VMAP_STACK 90#ifdef CONFIG_VMAP_STACK
76 /* 91 /*
@@ -82,7 +97,7 @@
82 tbnz x0, #THREAD_SHIFT, 0f 97 tbnz x0, #THREAD_SHIFT, 0f
83 sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 98 sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
84 sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp 99 sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
85 b \label 100 b el\()\el\()_\label
86 101
870: 1020:
88 /* 103 /*
@@ -114,7 +129,12 @@
114 sub sp, sp, x0 129 sub sp, sp, x0
115 mrs x0, tpidrro_el0 130 mrs x0, tpidrro_el0
116#endif 131#endif
117 b \label 132 b el\()\el\()_\label
133 .endm
134
135 .macro tramp_alias, dst, sym
136 mov_q \dst, TRAMP_VALIAS
137 add \dst, \dst, #(\sym - .entry.tramp.text)
118 .endm 138 .endm
119 139
120 .macro kernel_entry, el, regsize = 64 140 .macro kernel_entry, el, regsize = 64
@@ -185,7 +205,7 @@ alternative_else_nop_endif
185 205
186 .if \el != 0 206 .if \el != 0
187 mrs x21, ttbr0_el1 207 mrs x21, ttbr0_el1
188 tst x21, #0xffff << 48 // Check for the reserved ASID 208 tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
189 orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR 209 orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
190 b.eq 1f // TTBR0 access already disabled 210 b.eq 1f // TTBR0 access already disabled
191 and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR 211 and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
@@ -248,7 +268,7 @@ alternative_else_nop_endif
248 tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set 268 tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
249 .endif 269 .endif
250 270
251 __uaccess_ttbr0_enable x0 271 __uaccess_ttbr0_enable x0, x1
252 272
253 .if \el == 0 273 .if \el == 0
254 /* 274 /*
@@ -257,7 +277,7 @@ alternative_else_nop_endif
257 * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache 277 * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
258 * corruption). 278 * corruption).
259 */ 279 */
260 post_ttbr0_update_workaround 280 bl post_ttbr_update_workaround
261 .endif 281 .endif
2621: 2821:
263 .if \el != 0 283 .if \el != 0
@@ -269,18 +289,20 @@ alternative_else_nop_endif
269 .if \el == 0 289 .if \el == 0
270 ldr x23, [sp, #S_SP] // load return stack pointer 290 ldr x23, [sp, #S_SP] // load return stack pointer
271 msr sp_el0, x23 291 msr sp_el0, x23
292 tst x22, #PSR_MODE32_BIT // native task?
293 b.eq 3f
294
272#ifdef CONFIG_ARM64_ERRATUM_845719 295#ifdef CONFIG_ARM64_ERRATUM_845719
273alternative_if ARM64_WORKAROUND_845719 296alternative_if ARM64_WORKAROUND_845719
274 tbz x22, #4, 1f
275#ifdef CONFIG_PID_IN_CONTEXTIDR 297#ifdef CONFIG_PID_IN_CONTEXTIDR
276 mrs x29, contextidr_el1 298 mrs x29, contextidr_el1
277 msr contextidr_el1, x29 299 msr contextidr_el1, x29
278#else 300#else
279 msr contextidr_el1, xzr 301 msr contextidr_el1, xzr
280#endif 302#endif
2811:
282alternative_else_nop_endif 303alternative_else_nop_endif
283#endif 304#endif
3053:
284 .endif 306 .endif
285 307
286 msr elr_el1, x21 // set up the return data 308 msr elr_el1, x21 // set up the return data
@@ -302,7 +324,21 @@ alternative_else_nop_endif
302 ldp x28, x29, [sp, #16 * 14] 324 ldp x28, x29, [sp, #16 * 14]
303 ldr lr, [sp, #S_LR] 325 ldr lr, [sp, #S_LR]
304 add sp, sp, #S_FRAME_SIZE // restore sp 326 add sp, sp, #S_FRAME_SIZE // restore sp
305 eret // return to kernel 327
328 .if \el == 0
329alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
330#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
331 bne 4f
332 msr far_el1, x30
333 tramp_alias x30, tramp_exit_native
334 br x30
3354:
336 tramp_alias x30, tramp_exit_compat
337 br x30
338#endif
339 .else
340 eret
341 .endif
306 .endm 342 .endm
307 343
308 .macro irq_stack_entry 344 .macro irq_stack_entry
@@ -367,31 +403,31 @@ tsk .req x28 // current thread_info
367 403
368 .align 11 404 .align 11
369ENTRY(vectors) 405ENTRY(vectors)
370 kernel_ventry el1_sync_invalid // Synchronous EL1t 406 kernel_ventry 1, sync_invalid // Synchronous EL1t
371 kernel_ventry el1_irq_invalid // IRQ EL1t 407 kernel_ventry 1, irq_invalid // IRQ EL1t
372 kernel_ventry el1_fiq_invalid // FIQ EL1t 408 kernel_ventry 1, fiq_invalid // FIQ EL1t
373 kernel_ventry el1_error_invalid // Error EL1t 409 kernel_ventry 1, error_invalid // Error EL1t
374 410
375 kernel_ventry el1_sync // Synchronous EL1h 411 kernel_ventry 1, sync // Synchronous EL1h
376 kernel_ventry el1_irq // IRQ EL1h 412 kernel_ventry 1, irq // IRQ EL1h
377 kernel_ventry el1_fiq_invalid // FIQ EL1h 413 kernel_ventry 1, fiq_invalid // FIQ EL1h
378 kernel_ventry el1_error // Error EL1h 414 kernel_ventry 1, error // Error EL1h
379 415
380 kernel_ventry el0_sync // Synchronous 64-bit EL0 416 kernel_ventry 0, sync // Synchronous 64-bit EL0
381 kernel_ventry el0_irq // IRQ 64-bit EL0 417 kernel_ventry 0, irq // IRQ 64-bit EL0
382 kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0 418 kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
383 kernel_ventry el0_error // Error 64-bit EL0 419 kernel_ventry 0, error // Error 64-bit EL0
384 420
385#ifdef CONFIG_COMPAT 421#ifdef CONFIG_COMPAT
386 kernel_ventry el0_sync_compat // Synchronous 32-bit EL0 422 kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
387 kernel_ventry el0_irq_compat // IRQ 32-bit EL0 423 kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
388 kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 424 kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
389 kernel_ventry el0_error_compat // Error 32-bit EL0 425 kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
390#else 426#else
391 kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0 427 kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
392 kernel_ventry el0_irq_invalid // IRQ 32-bit EL0 428 kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
393 kernel_ventry el0_fiq_invalid // FIQ 32-bit EL0 429 kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
394 kernel_ventry el0_error_invalid // Error 32-bit EL0 430 kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
395#endif 431#endif
396END(vectors) 432END(vectors)
397 433
@@ -685,12 +721,15 @@ el0_ia:
685 * Instruction abort handling 721 * Instruction abort handling
686 */ 722 */
687 mrs x26, far_el1 723 mrs x26, far_el1
688 enable_daif 724 enable_da_f
725#ifdef CONFIG_TRACE_IRQFLAGS
726 bl trace_hardirqs_off
727#endif
689 ct_user_exit 728 ct_user_exit
690 mov x0, x26 729 mov x0, x26
691 mov x1, x25 730 mov x1, x25
692 mov x2, sp 731 mov x2, sp
693 bl do_mem_abort 732 bl do_el0_ia_bp_hardening
694 b ret_to_user 733 b ret_to_user
695el0_fpsimd_acc: 734el0_fpsimd_acc:
696 /* 735 /*
@@ -943,6 +982,124 @@ __ni_sys_trace:
943 982
944 .popsection // .entry.text 983 .popsection // .entry.text
945 984
985#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
986/*
987 * Exception vectors trampoline.
988 */
989 .pushsection ".entry.tramp.text", "ax"
990
991 .macro tramp_map_kernel, tmp
992 mrs \tmp, ttbr1_el1
993 add \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
994 bic \tmp, \tmp, #USER_ASID_FLAG
995 msr ttbr1_el1, \tmp
996#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
997alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
998 /* ASID already in \tmp[63:48] */
999 movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
1000 movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
1001 /* 2MB boundary containing the vectors, so we nobble the walk cache */
1002 movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
1003 isb
1004 tlbi vae1, \tmp
1005 dsb nsh
1006alternative_else_nop_endif
1007#endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
1008 .endm
1009
1010 .macro tramp_unmap_kernel, tmp
1011 mrs \tmp, ttbr1_el1
1012 sub \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
1013 orr \tmp, \tmp, #USER_ASID_FLAG
1014 msr ttbr1_el1, \tmp
1015 /*
1016 * We avoid running the post_ttbr_update_workaround here because the
1017 * user and kernel ASIDs don't have conflicting mappings, so any
1018 * "blessing" as described in:
1019 *
1020 * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com
1021 *
1022 * will not hurt correctness. Whilst this may partially defeat the
1023 * point of using split ASIDs in the first place, it avoids
1024 * the hit of invalidating the entire I-cache on every return to
1025 * userspace.
1026 */
1027 .endm
1028
1029 .macro tramp_ventry, regsize = 64
1030 .align 7
10311:
1032 .if \regsize == 64
1033 msr tpidrro_el0, x30 // Restored in kernel_ventry
1034 .endif
1035 /*
1036 * Defend against branch aliasing attacks by pushing a dummy
1037 * entry onto the return stack and using a RET instruction to
1038 * enter the full-fat kernel vectors.
1039 */
1040 bl 2f
1041 b .
10422:
1043 tramp_map_kernel x30
1044#ifdef CONFIG_RANDOMIZE_BASE
1045 adr x30, tramp_vectors + PAGE_SIZE
1046alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
1047 ldr x30, [x30]
1048#else
1049 ldr x30, =vectors
1050#endif
1051 prfm plil1strm, [x30, #(1b - tramp_vectors)]
1052 msr vbar_el1, x30
1053 add x30, x30, #(1b - tramp_vectors)
1054 isb
1055 ret
1056 .endm
1057
1058 .macro tramp_exit, regsize = 64
1059 adr x30, tramp_vectors
1060 msr vbar_el1, x30
1061 tramp_unmap_kernel x30
1062 .if \regsize == 64
1063 mrs x30, far_el1
1064 .endif
1065 eret
1066 .endm
1067
1068 .align 11
1069ENTRY(tramp_vectors)
1070 .space 0x400
1071
1072 tramp_ventry
1073 tramp_ventry
1074 tramp_ventry
1075 tramp_ventry
1076
1077 tramp_ventry 32
1078 tramp_ventry 32
1079 tramp_ventry 32
1080 tramp_ventry 32
1081END(tramp_vectors)
1082
1083ENTRY(tramp_exit_native)
1084 tramp_exit
1085END(tramp_exit_native)
1086
1087ENTRY(tramp_exit_compat)
1088 tramp_exit 32
1089END(tramp_exit_compat)
1090
1091 .ltorg
1092 .popsection // .entry.tramp.text
1093#ifdef CONFIG_RANDOMIZE_BASE
1094 .pushsection ".rodata", "a"
1095 .align PAGE_SHIFT
1096 .globl __entry_tramp_data_start
1097__entry_tramp_data_start:
1098 .quad vectors
1099 .popsection // .rodata
1100#endif /* CONFIG_RANDOMIZE_BASE */
1101#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
1102
946/* 1103/*
947 * Special system call wrappers. 1104 * Special system call wrappers.
948 */ 1105 */
@@ -996,3 +1153,180 @@ ENTRY(ret_from_fork)
996 b ret_to_user 1153 b ret_to_user
997ENDPROC(ret_from_fork) 1154ENDPROC(ret_from_fork)
998NOKPROBE(ret_from_fork) 1155NOKPROBE(ret_from_fork)
1156
1157#ifdef CONFIG_ARM_SDE_INTERFACE
1158
1159#include <asm/sdei.h>
1160#include <uapi/linux/arm_sdei.h>
1161
1162.macro sdei_handler_exit exit_mode
1163 /* On success, this call never returns... */
1164 cmp \exit_mode, #SDEI_EXIT_SMC
1165 b.ne 99f
1166 smc #0
1167 b .
116899: hvc #0
1169 b .
1170.endm
1171
1172#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
1173/*
1174 * The regular SDEI entry point may have been unmapped along with the rest of
1175 * the kernel. This trampoline restores the kernel mapping to make the x1 memory
1176 * argument accessible.
1177 *
1178 * This clobbers x4, __sdei_handler() will restore this from firmware's
1179 * copy.
1180 */
1181.ltorg
1182.pushsection ".entry.tramp.text", "ax"
1183ENTRY(__sdei_asm_entry_trampoline)
1184 mrs x4, ttbr1_el1
1185 tbz x4, #USER_ASID_BIT, 1f
1186
1187 tramp_map_kernel tmp=x4
1188 isb
1189 mov x4, xzr
1190
1191 /*
1192 * Use reg->interrupted_regs.addr_limit to remember whether to unmap
1193 * the kernel on exit.
1194 */
11951: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
1196
1197#ifdef CONFIG_RANDOMIZE_BASE
1198 adr x4, tramp_vectors + PAGE_SIZE
1199 add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
1200 ldr x4, [x4]
1201#else
1202 ldr x4, =__sdei_asm_handler
1203#endif
1204 br x4
1205ENDPROC(__sdei_asm_entry_trampoline)
1206NOKPROBE(__sdei_asm_entry_trampoline)
1207
1208/*
1209 * Make the exit call and restore the original ttbr1_el1
1210 *
1211 * x0 & x1: setup for the exit API call
1212 * x2: exit_mode
1213 * x4: struct sdei_registered_event argument from registration time.
1214 */
1215ENTRY(__sdei_asm_exit_trampoline)
1216 ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
1217 cbnz x4, 1f
1218
1219 tramp_unmap_kernel tmp=x4
1220
12211: sdei_handler_exit exit_mode=x2
1222ENDPROC(__sdei_asm_exit_trampoline)
1223NOKPROBE(__sdei_asm_exit_trampoline)
1224 .ltorg
1225.popsection // .entry.tramp.text
1226#ifdef CONFIG_RANDOMIZE_BASE
1227.pushsection ".rodata", "a"
1228__sdei_asm_trampoline_next_handler:
1229 .quad __sdei_asm_handler
1230.popsection // .rodata
1231#endif /* CONFIG_RANDOMIZE_BASE */
1232#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
1233
1234/*
1235 * Software Delegated Exception entry point.
1236 *
1237 * x0: Event number
1238 * x1: struct sdei_registered_event argument from registration time.
1239 * x2: interrupted PC
1240 * x3: interrupted PSTATE
1241 * x4: maybe clobbered by the trampoline
1242 *
1243 * Firmware has preserved x0->x17 for us, we must save/restore the rest to
1244 * follow SMC-CC. We save (or retrieve) all the registers as the handler may
1245 * want them.
1246 */
1247ENTRY(__sdei_asm_handler)
1248 stp x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC]
1249 stp x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2]
1250 stp x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3]
1251 stp x8, x9, [x1, #SDEI_EVENT_INTREGS + 16 * 4]
1252 stp x10, x11, [x1, #SDEI_EVENT_INTREGS + 16 * 5]
1253 stp x12, x13, [x1, #SDEI_EVENT_INTREGS + 16 * 6]
1254 stp x14, x15, [x1, #SDEI_EVENT_INTREGS + 16 * 7]
1255 stp x16, x17, [x1, #SDEI_EVENT_INTREGS + 16 * 8]
1256 stp x18, x19, [x1, #SDEI_EVENT_INTREGS + 16 * 9]
1257 stp x20, x21, [x1, #SDEI_EVENT_INTREGS + 16 * 10]
1258 stp x22, x23, [x1, #SDEI_EVENT_INTREGS + 16 * 11]
1259 stp x24, x25, [x1, #SDEI_EVENT_INTREGS + 16 * 12]
1260 stp x26, x27, [x1, #SDEI_EVENT_INTREGS + 16 * 13]
1261 stp x28, x29, [x1, #SDEI_EVENT_INTREGS + 16 * 14]
1262 mov x4, sp
1263 stp lr, x4, [x1, #SDEI_EVENT_INTREGS + S_LR]
1264
1265 mov x19, x1
1266
1267#ifdef CONFIG_VMAP_STACK
1268 /*
1269 * entry.S may have been using sp as a scratch register, find whether
1270 * this is a normal or critical event and switch to the appropriate
1271 * stack for this CPU.
1272 */
1273 ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
1274 cbnz w4, 1f
1275 ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
1276 b 2f
12771: ldr_this_cpu dst=x5, sym=sdei_stack_critical_ptr, tmp=x6
12782: mov x6, #SDEI_STACK_SIZE
1279 add x5, x5, x6
1280 mov sp, x5
1281#endif
1282
1283 /*
1284 * We may have interrupted userspace, or a guest, or exit-from or
1285 * return-to either of these. We can't trust sp_el0, restore it.
1286 */
1287 mrs x28, sp_el0
1288 ldr_this_cpu dst=x0, sym=__entry_task, tmp=x1
1289 msr sp_el0, x0
1290
1291 /* If we interrupted the kernel point to the previous stack/frame. */
1292 and x0, x3, #0xc
1293 mrs x1, CurrentEL
1294 cmp x0, x1
1295 csel x29, x29, xzr, eq // fp, or zero
1296 csel x4, x2, xzr, eq // elr, or zero
1297
1298 stp x29, x4, [sp, #-16]!
1299 mov x29, sp
1300
1301 add x0, x19, #SDEI_EVENT_INTREGS
1302 mov x1, x19
1303 bl __sdei_handler
1304
1305 msr sp_el0, x28
1306 /* restore regs >x17 that we clobbered */
1307 mov x4, x19 // keep x4 for __sdei_asm_exit_trampoline
1308 ldp x28, x29, [x4, #SDEI_EVENT_INTREGS + 16 * 14]
1309 ldp x18, x19, [x4, #SDEI_EVENT_INTREGS + 16 * 9]
1310 ldp lr, x1, [x4, #SDEI_EVENT_INTREGS + S_LR]
1311 mov sp, x1
1312
1313 mov x1, x0 // address to complete_and_resume
1314 /* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */
1315 cmp x0, #1
1316 mov_q x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE
1317 mov_q x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
1318 csel x0, x2, x3, ls
1319
1320 ldr_l x2, sdei_exit_mode
1321
1322alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
1323 sdei_handler_exit exit_mode=x2
1324alternative_else_nop_endif
1325
1326#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
1327 tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline
1328 br x5
1329#endif
1330ENDPROC(__sdei_asm_handler)
1331NOKPROBE(__sdei_asm_handler)
1332#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index fae81f7964b4..55fb544072f6 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1036,14 +1036,14 @@ void fpsimd_restore_current_state(void)
1036 * flag that indicates that the FPSIMD register contents are the most recent 1036 * flag that indicates that the FPSIMD register contents are the most recent
1037 * FPSIMD state of 'current' 1037 * FPSIMD state of 'current'
1038 */ 1038 */
1039void fpsimd_update_current_state(struct fpsimd_state *state) 1039void fpsimd_update_current_state(struct user_fpsimd_state const *state)
1040{ 1040{
1041 if (!system_supports_fpsimd()) 1041 if (!system_supports_fpsimd())
1042 return; 1042 return;
1043 1043
1044 local_bh_disable(); 1044 local_bh_disable();
1045 1045
1046 current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd; 1046 current->thread.fpsimd_state.user_fpsimd = *state;
1047 if (system_supports_sve() && test_thread_flag(TIF_SVE)) 1047 if (system_supports_sve() && test_thread_flag(TIF_SVE))
1048 fpsimd_to_sve(current); 1048 fpsimd_to_sve(current);
1049 1049
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index e3cb9fbf96b6..ba3ab04788dc 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -148,6 +148,26 @@ preserve_boot_args:
148ENDPROC(preserve_boot_args) 148ENDPROC(preserve_boot_args)
149 149
150/* 150/*
151 * Macro to arrange a physical address in a page table entry, taking care of
152 * 52-bit addresses.
153 *
154 * Preserves: phys
155 * Returns: pte
156 */
157 .macro phys_to_pte, phys, pte
158#ifdef CONFIG_ARM64_PA_BITS_52
159 /*
160 * We assume \phys is 64K aligned and this is guaranteed by only
161 * supporting this configuration with 64K pages.
162 */
163 orr \pte, \phys, \phys, lsr #36
164 and \pte, \pte, #PTE_ADDR_MASK
165#else
166 mov \pte, \phys
167#endif
168 .endm
169
170/*
151 * Macro to create a table entry to the next page. 171 * Macro to create a table entry to the next page.
152 * 172 *
153 * tbl: page table address 173 * tbl: page table address
@@ -156,54 +176,124 @@ ENDPROC(preserve_boot_args)
156 * ptrs: #imm pointers per table page 176 * ptrs: #imm pointers per table page
157 * 177 *
158 * Preserves: virt 178 * Preserves: virt
159 * Corrupts: tmp1, tmp2 179 * Corrupts: ptrs, tmp1, tmp2
160 * Returns: tbl -> next level table page address 180 * Returns: tbl -> next level table page address
161 */ 181 */
162 .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 182 .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
163 lsr \tmp1, \virt, #\shift 183 add \tmp1, \tbl, #PAGE_SIZE
164 and \tmp1, \tmp1, #\ptrs - 1 // table index 184 phys_to_pte \tmp1, \tmp2
165 add \tmp2, \tbl, #PAGE_SIZE
166 orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type 185 orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
186 lsr \tmp1, \virt, #\shift
187 sub \ptrs, \ptrs, #1
188 and \tmp1, \tmp1, \ptrs // table index
167 str \tmp2, [\tbl, \tmp1, lsl #3] 189 str \tmp2, [\tbl, \tmp1, lsl #3]
168 add \tbl, \tbl, #PAGE_SIZE // next level table page 190 add \tbl, \tbl, #PAGE_SIZE // next level table page
169 .endm 191 .endm
170 192
171/* 193/*
172 * Macro to populate the PGD (and possibily PUD) for the corresponding 194 * Macro to populate page table entries, these entries can be pointers to the next level
173 * block entry in the next level (tbl) for the given virtual address. 195 * or last level entries pointing to physical memory.
196 *
197 * tbl: page table address
198 * rtbl: pointer to page table or physical memory
199 * index: start index to write
200 * eindex: end index to write - [index, eindex] written to
201 * flags: flags for pagetable entry to or in
202 * inc: increment to rtbl between each entry
203 * tmp1: temporary variable
174 * 204 *
175 * Preserves: tbl, next, virt 205 * Preserves: tbl, eindex, flags, inc
176 * Corrupts: tmp1, tmp2 206 * Corrupts: index, tmp1
207 * Returns: rtbl
177 */ 208 */
178 .macro create_pgd_entry, tbl, virt, tmp1, tmp2 209 .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
179 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 210.Lpe\@: phys_to_pte \rtbl, \tmp1
180#if SWAPPER_PGTABLE_LEVELS > 3 211 orr \tmp1, \tmp1, \flags // tmp1 = table entry
181 create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 212 str \tmp1, [\tbl, \index, lsl #3]
182#endif 213 add \rtbl, \rtbl, \inc // rtbl = pa next level
183#if SWAPPER_PGTABLE_LEVELS > 2 214 add \index, \index, #1
184 create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 215 cmp \index, \eindex
185#endif 216 b.ls .Lpe\@
217 .endm
218
219/*
220 * Compute indices of table entries from virtual address range. If multiple entries
221 * were needed in the previous page table level then the next page table level is assumed
222 * to be composed of multiple pages. (This effectively scales the end index).
223 *
224 * vstart: virtual address of start of range
225 * vend: virtual address of end of range
226 * shift: shift used to transform virtual address into index
227 * ptrs: number of entries in page table
228 * istart: index in table corresponding to vstart
229 * iend: index in table corresponding to vend
230 * count: On entry: how many extra entries were required in previous level, scales
231 * our end index.
232 * On exit: returns how many extra entries required for next page table level
233 *
234 * Preserves: vstart, vend, shift, ptrs
235 * Returns: istart, iend, count
236 */
237 .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
238 lsr \iend, \vend, \shift
239 mov \istart, \ptrs
240 sub \istart, \istart, #1
241 and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
242 mov \istart, \ptrs
243 mul \istart, \istart, \count
244 add \iend, \iend, \istart // iend += (count - 1) * ptrs
245 // our entries span multiple tables
246
247 lsr \istart, \vstart, \shift
248 mov \count, \ptrs
249 sub \count, \count, #1
250 and \istart, \istart, \count
251
252 sub \count, \iend, \istart
186 .endm 253 .endm
187 254
188/* 255/*
189 * Macro to populate block entries in the page table for the start..end 256 * Map memory for specified virtual address range. Each level of page table needed supports
190 * virtual range (inclusive). 257 * multiple entries. If a level requires n entries the next page table level is assumed to be
258 * formed from n pages.
259 *
260 * tbl: location of page table
261 * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
262 * vstart: start address to map
263 * vend: end address to map - we map [vstart, vend]
264 * flags: flags to use to map last level entries
265 * phys: physical address corresponding to vstart - physical memory is contiguous
266 * pgds: the number of pgd entries
191 * 267 *
192 * Preserves: tbl, flags 268 * Temporaries: istart, iend, tmp, count, sv - these need to be different registers
193 * Corrupts: phys, start, end, pstate 269 * Preserves: vstart, vend, flags
270 * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv
194 */ 271 */
195 .macro create_block_map, tbl, flags, phys, start, end 272 .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
196 lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT 273 add \rtbl, \tbl, #PAGE_SIZE
197 lsr \start, \start, #SWAPPER_BLOCK_SHIFT 274 mov \sv, \rtbl
198 and \start, \start, #PTRS_PER_PTE - 1 // table index 275 mov \count, #0
199 orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry 276 compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
200 lsr \end, \end, #SWAPPER_BLOCK_SHIFT 277 populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
201 and \end, \end, #PTRS_PER_PTE - 1 // table end index 278 mov \tbl, \sv
2029999: str \phys, [\tbl, \start, lsl #3] // store the entry 279 mov \sv, \rtbl
203 add \start, \start, #1 // next entry 280
204 add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block 281#if SWAPPER_PGTABLE_LEVELS > 3
205 cmp \start, \end 282 compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
206 b.ls 9999b 283 populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
284 mov \tbl, \sv
285 mov \sv, \rtbl
286#endif
287
288#if SWAPPER_PGTABLE_LEVELS > 2
289 compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
290 populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
291 mov \tbl, \sv
292#endif
293
294 compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
295 bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
296 populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
207 .endm 297 .endm
208 298
209/* 299/*
@@ -221,14 +311,16 @@ __create_page_tables:
221 * dirty cache lines being evicted. 311 * dirty cache lines being evicted.
222 */ 312 */
223 adrp x0, idmap_pg_dir 313 adrp x0, idmap_pg_dir
224 ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) 314 adrp x1, swapper_pg_end
315 sub x1, x1, x0
225 bl __inval_dcache_area 316 bl __inval_dcache_area
226 317
227 /* 318 /*
228 * Clear the idmap and swapper page tables. 319 * Clear the idmap and swapper page tables.
229 */ 320 */
230 adrp x0, idmap_pg_dir 321 adrp x0, idmap_pg_dir
231 ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) 322 adrp x1, swapper_pg_end
323 sub x1, x1, x0
2321: stp xzr, xzr, [x0], #16 3241: stp xzr, xzr, [x0], #16
233 stp xzr, xzr, [x0], #16 325 stp xzr, xzr, [x0], #16
234 stp xzr, xzr, [x0], #16 326 stp xzr, xzr, [x0], #16
@@ -244,26 +336,13 @@ __create_page_tables:
244 adrp x0, idmap_pg_dir 336 adrp x0, idmap_pg_dir
245 adrp x3, __idmap_text_start // __pa(__idmap_text_start) 337 adrp x3, __idmap_text_start // __pa(__idmap_text_start)
246 338
247#ifndef CONFIG_ARM64_VA_BITS_48
248#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
249#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT))
250
251 /*
252 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
253 * created that covers system RAM if that is located sufficiently high
254 * in the physical address space. So for the ID map, use an extended
255 * virtual range in that case, by configuring an additional translation
256 * level.
257 * First, we have to verify our assumption that the current value of
258 * VA_BITS was chosen such that all translation levels are fully
259 * utilised, and that lowering T0SZ will always result in an additional
260 * translation level to be configured.
261 */
262#if VA_BITS != EXTRA_SHIFT
263#error "Mismatch between VA_BITS and page size/number of translation levels"
264#endif
265
266 /* 339 /*
340 * VA_BITS may be too small to allow for an ID mapping to be created
341 * that covers system RAM if that is located sufficiently high in the
342 * physical address space. So for the ID map, use an extended virtual
343 * range in that case, and configure an additional translation level
344 * if needed.
345 *
267 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the 346 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
268 * entire ID map region can be mapped. As T0SZ == (64 - #bits used), 347 * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
269 * this number conveniently equals the number of leading zeroes in 348 * this number conveniently equals the number of leading zeroes in
@@ -272,21 +351,44 @@ __create_page_tables:
272 adrp x5, __idmap_text_end 351 adrp x5, __idmap_text_end
273 clz x5, x5 352 clz x5, x5
274 cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? 353 cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough?
275 b.ge 1f // .. then skip additional level 354 b.ge 1f // .. then skip VA range extension
276 355
277 adr_l x6, idmap_t0sz 356 adr_l x6, idmap_t0sz
278 str x5, [x6] 357 str x5, [x6]
279 dmb sy 358 dmb sy
280 dc ivac, x6 // Invalidate potentially stale cache line 359 dc ivac, x6 // Invalidate potentially stale cache line
281 360
282 create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 361#if (VA_BITS < 48)
2831: 362#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
363#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
364
365 /*
366 * If VA_BITS < 48, we have to configure an additional table level.
367 * First, we have to verify our assumption that the current value of
368 * VA_BITS was chosen such that all translation levels are fully
369 * utilised, and that lowering T0SZ will always result in an additional
370 * translation level to be configured.
371 */
372#if VA_BITS != EXTRA_SHIFT
373#error "Mismatch between VA_BITS and page size/number of translation levels"
284#endif 374#endif
285 375
286 create_pgd_entry x0, x3, x5, x6 376 mov x4, EXTRA_PTRS
377 create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
378#else
379 /*
380 * If VA_BITS == 48, we don't have to configure an additional
381 * translation level, but the top-level table has more entries.
382 */
383 mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
384 str_l x4, idmap_ptrs_per_pgd, x5
385#endif
3861:
387 ldr_l x4, idmap_ptrs_per_pgd
287 mov x5, x3 // __pa(__idmap_text_start) 388 mov x5, x3 // __pa(__idmap_text_start)
288 adr_l x6, __idmap_text_end // __pa(__idmap_text_end) 389 adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
289 create_block_map x0, x7, x3, x5, x6 390
391 map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
290 392
291 /* 393 /*
292 * Map the kernel image (starting with PHYS_OFFSET). 394 * Map the kernel image (starting with PHYS_OFFSET).
@@ -294,12 +396,13 @@ __create_page_tables:
294 adrp x0, swapper_pg_dir 396 adrp x0, swapper_pg_dir
295 mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) 397 mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
296 add x5, x5, x23 // add KASLR displacement 398 add x5, x5, x23 // add KASLR displacement
297 create_pgd_entry x0, x5, x3, x6 399 mov x4, PTRS_PER_PGD
298 adrp x6, _end // runtime __pa(_end) 400 adrp x6, _end // runtime __pa(_end)
299 adrp x3, _text // runtime __pa(_text) 401 adrp x3, _text // runtime __pa(_text)
300 sub x6, x6, x3 // _end - _text 402 sub x6, x6, x3 // _end - _text
301 add x6, x6, x5 // runtime __va(_end) 403 add x6, x6, x5 // runtime __va(_end)
302 create_block_map x0, x7, x3, x5, x6 404
405 map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
303 406
304 /* 407 /*
305 * Since the page tables have been populated with non-cacheable 408 * Since the page tables have been populated with non-cacheable
@@ -307,7 +410,8 @@ __create_page_tables:
307 * tables again to remove any speculatively loaded cache lines. 410 * tables again to remove any speculatively loaded cache lines.
308 */ 411 */
309 adrp x0, idmap_pg_dir 412 adrp x0, idmap_pg_dir
310 ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) 413 adrp x1, swapper_pg_end
414 sub x1, x1, x0
311 dmb sy 415 dmb sy
312 bl __inval_dcache_area 416 bl __inval_dcache_area
313 417
@@ -388,17 +492,13 @@ ENTRY(el2_setup)
388 mrs x0, CurrentEL 492 mrs x0, CurrentEL
389 cmp x0, #CurrentEL_EL2 493 cmp x0, #CurrentEL_EL2
390 b.eq 1f 494 b.eq 1f
391 mrs x0, sctlr_el1 495 mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
392CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1
393CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
394 msr sctlr_el1, x0 496 msr sctlr_el1, x0
395 mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 497 mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
396 isb 498 isb
397 ret 499 ret
398 500
3991: mrs x0, sctlr_el2 5011: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
400CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2
401CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2
402 msr sctlr_el2, x0 502 msr sctlr_el2, x0
403 503
404#ifdef CONFIG_ARM64_VHE 504#ifdef CONFIG_ARM64_VHE
@@ -514,10 +614,7 @@ install_el2_stub:
514 * requires no configuration, and all non-hyp-specific EL2 setup 614 * requires no configuration, and all non-hyp-specific EL2 setup
515 * will be done via the _EL1 system register aliases in __cpu_setup. 615 * will be done via the _EL1 system register aliases in __cpu_setup.
516 */ 616 */
517 /* sctlr_el1 */ 617 mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
518 mov x0, #0x0800 // Set/clear RES{1,0} bits
519CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems
520CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
521 msr sctlr_el1, x0 618 msr sctlr_el1, x0
522 619
523 /* Coprocessor traps. */ 620 /* Coprocessor traps. */
@@ -679,8 +776,10 @@ ENTRY(__enable_mmu)
679 update_early_cpu_boot_status 0, x1, x2 776 update_early_cpu_boot_status 0, x1, x2
680 adrp x1, idmap_pg_dir 777 adrp x1, idmap_pg_dir
681 adrp x2, swapper_pg_dir 778 adrp x2, swapper_pg_dir
682 msr ttbr0_el1, x1 // load TTBR0 779 phys_to_ttbr x1, x3
683 msr ttbr1_el1, x2 // load TTBR1 780 phys_to_ttbr x2, x4
781 msr ttbr0_el1, x3 // load TTBR0
782 msr ttbr1_el1, x4 // load TTBR1
684 isb 783 isb
685 msr sctlr_el1, x0 784 msr sctlr_el1, x0
686 isb 785 isb
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index e56d848b6466..84f5d52fddda 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -33,12 +33,14 @@
33 * Even switching to our copied tables will cause a changed output address at 33 * Even switching to our copied tables will cause a changed output address at
34 * each stage of the walk. 34 * each stage of the walk.
35 */ 35 */
36.macro break_before_make_ttbr_switch zero_page, page_table 36.macro break_before_make_ttbr_switch zero_page, page_table, tmp
37 msr ttbr1_el1, \zero_page 37 phys_to_ttbr \zero_page, \tmp
38 msr ttbr1_el1, \tmp
38 isb 39 isb
39 tlbi vmalle1 40 tlbi vmalle1
40 dsb nsh 41 dsb nsh
41 msr ttbr1_el1, \page_table 42 phys_to_ttbr \page_table, \tmp
43 msr ttbr1_el1, \tmp
42 isb 44 isb
43.endm 45.endm
44 46
@@ -78,7 +80,7 @@ ENTRY(swsusp_arch_suspend_exit)
78 * We execute from ttbr0, change ttbr1 to our copied linear map tables 80 * We execute from ttbr0, change ttbr1 to our copied linear map tables
79 * with a break-before-make via the zero page 81 * with a break-before-make via the zero page
80 */ 82 */
81 break_before_make_ttbr_switch x5, x0 83 break_before_make_ttbr_switch x5, x0, x6
82 84
83 mov x21, x1 85 mov x21, x1
84 mov x30, x2 86 mov x30, x2
@@ -109,7 +111,7 @@ ENTRY(swsusp_arch_suspend_exit)
109 dsb ish /* wait for PoU cleaning to finish */ 111 dsb ish /* wait for PoU cleaning to finish */
110 112
111 /* switch to the restored kernels page tables */ 113 /* switch to the restored kernels page tables */
112 break_before_make_ttbr_switch x25, x21 114 break_before_make_ttbr_switch x25, x21, x6
113 115
114 ic ialluis 116 ic ialluis
115 dsb ish 117 dsb ish
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 3009b8b80f08..f20cf7e99249 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -247,8 +247,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
247 } 247 }
248 248
249 pte = pte_offset_kernel(pmd, dst_addr); 249 pte = pte_offset_kernel(pmd, dst_addr);
250 set_pte(pte, __pte(virt_to_phys((void *)dst) | 250 set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
251 pgprot_val(PAGE_KERNEL_EXEC)));
252 251
253 /* 252 /*
254 * Load our new page tables. A strict BBM approach requires that we 253 * Load our new page tables. A strict BBM approach requires that we
@@ -264,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
264 */ 263 */
265 cpu_set_reserved_ttbr0(); 264 cpu_set_reserved_ttbr0();
266 local_flush_tlb_all(); 265 local_flush_tlb_all();
267 write_sysreg(virt_to_phys(pgd), ttbr0_el1); 266 write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
268 isb(); 267 isb();
269 268
270 *phys_dst_addr = virt_to_phys((void *)dst); 269 *phys_dst_addr = virt_to_phys((void *)dst);
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 713561e5bcab..60e5fc661f74 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -29,6 +29,7 @@
29#include <linux/irqchip.h> 29#include <linux/irqchip.h>
30#include <linux/seq_file.h> 30#include <linux/seq_file.h>
31#include <linux/vmalloc.h> 31#include <linux/vmalloc.h>
32#include <asm/vmap_stack.h>
32 33
33unsigned long irq_err_count; 34unsigned long irq_err_count;
34 35
@@ -58,17 +59,7 @@ static void init_irq_stacks(void)
58 unsigned long *p; 59 unsigned long *p;
59 60
60 for_each_possible_cpu(cpu) { 61 for_each_possible_cpu(cpu) {
61 /* 62 p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
62 * To ensure that VMAP'd stack overflow detection works
63 * correctly, the IRQ stacks need to have the same
64 * alignment as other stacks.
65 */
66 p = __vmalloc_node_range(IRQ_STACK_SIZE, THREAD_ALIGN,
67 VMALLOC_START, VMALLOC_END,
68 THREADINFO_GFP, PAGE_KERNEL,
69 0, cpu_to_node(cpu),
70 __builtin_return_address(0));
71
72 per_cpu(irq_stack_ptr, cpu) = p; 63 per_cpu(irq_stack_ptr, cpu) = p;
73 } 64 }
74} 65}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6b7dcf4310ac..583fd8154695 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -370,16 +370,14 @@ void tls_preserve_current_state(void)
370 370
371static void tls_thread_switch(struct task_struct *next) 371static void tls_thread_switch(struct task_struct *next)
372{ 372{
373 unsigned long tpidr, tpidrro;
374
375 tls_preserve_current_state(); 373 tls_preserve_current_state();
376 374
377 tpidr = *task_user_tls(next); 375 if (is_compat_thread(task_thread_info(next)))
378 tpidrro = is_compat_thread(task_thread_info(next)) ? 376 write_sysreg(next->thread.tp_value, tpidrro_el0);
379 next->thread.tp_value : 0; 377 else if (!arm64_kernel_unmapped_at_el0())
378 write_sysreg(0, tpidrro_el0);
380 379
381 write_sysreg(tpidr, tpidr_el0); 380 write_sysreg(*task_user_tls(next), tpidr_el0);
382 write_sysreg(tpidrro, tpidrro_el0);
383} 381}
384 382
385/* Restore the UAO state depending on next's addr_limit */ 383/* Restore the UAO state depending on next's addr_limit */
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
new file mode 100644
index 000000000000..6b8d90d5ceae
--- /dev/null
+++ b/arch/arm64/kernel/sdei.c
@@ -0,0 +1,235 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (C) 2017 Arm Ltd.
3#define pr_fmt(fmt) "sdei: " fmt
4
5#include <linux/arm_sdei.h>
6#include <linux/hardirq.h>
7#include <linux/irqflags.h>
8#include <linux/sched/task_stack.h>
9#include <linux/uaccess.h>
10
11#include <asm/alternative.h>
12#include <asm/kprobes.h>
13#include <asm/mmu.h>
14#include <asm/ptrace.h>
15#include <asm/sections.h>
16#include <asm/sysreg.h>
17#include <asm/vmap_stack.h>
18
19unsigned long sdei_exit_mode;
20
21/*
22 * VMAP'd stacks checking for stack overflow on exception using sp as a scratch
23 * register, meaning SDEI has to switch to its own stack. We need two stacks as
24 * a critical event may interrupt a normal event that has just taken a
25 * synchronous exception, and is using sp as scratch register. For a critical
26 * event interrupting a normal event, we can't reliably tell if we were on the
27 * sdei stack.
28 * For now, we allocate stacks when the driver is probed.
29 */
30DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
31DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
32
33#ifdef CONFIG_VMAP_STACK
34DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
35DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
36#endif
37
38static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
39{
40 unsigned long *p;
41
42 p = per_cpu(*ptr, cpu);
43 if (p) {
44 per_cpu(*ptr, cpu) = NULL;
45 vfree(p);
46 }
47}
48
49static void free_sdei_stacks(void)
50{
51 int cpu;
52
53 for_each_possible_cpu(cpu) {
54 _free_sdei_stack(&sdei_stack_normal_ptr, cpu);
55 _free_sdei_stack(&sdei_stack_critical_ptr, cpu);
56 }
57}
58
59static int _init_sdei_stack(unsigned long * __percpu *ptr, int cpu)
60{
61 unsigned long *p;
62
63 p = arch_alloc_vmap_stack(SDEI_STACK_SIZE, cpu_to_node(cpu));
64 if (!p)
65 return -ENOMEM;
66 per_cpu(*ptr, cpu) = p;
67
68 return 0;
69}
70
71static int init_sdei_stacks(void)
72{
73 int cpu;
74 int err = 0;
75
76 for_each_possible_cpu(cpu) {
77 err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
78 if (err)
79 break;
80 err = _init_sdei_stack(&sdei_stack_critical_ptr, cpu);
81 if (err)
82 break;
83 }
84
85 if (err)
86 free_sdei_stacks();
87
88 return err;
89}
90
91bool _on_sdei_stack(unsigned long sp)
92{
93 unsigned long low, high;
94
95 if (!IS_ENABLED(CONFIG_VMAP_STACK))
96 return false;
97
98 low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
99 high = low + SDEI_STACK_SIZE;
100
101 if (low <= sp && sp < high)
102 return true;
103
104 low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
105 high = low + SDEI_STACK_SIZE;
106
107 return (low <= sp && sp < high);
108}
109
110unsigned long sdei_arch_get_entry_point(int conduit)
111{
112 /*
113 * SDEI works between adjacent exception levels. If we booted at EL1 we
114 * assume a hypervisor is marshalling events. If we booted at EL2 and
115 * dropped to EL1 because we don't support VHE, then we can't support
116 * SDEI.
117 */
118 if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
119 pr_err("Not supported on this hardware/boot configuration\n");
120 return 0;
121 }
122
123 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
124 if (init_sdei_stacks())
125 return 0;
126 }
127
128 sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
129
130#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
131 if (arm64_kernel_unmapped_at_el0()) {
132 unsigned long offset;
133
134 offset = (unsigned long)__sdei_asm_entry_trampoline -
135 (unsigned long)__entry_tramp_text_start;
136 return TRAMP_VALIAS + offset;
137 } else
138#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
139 return (unsigned long)__sdei_asm_handler;
140
141}
142
143/*
144 * __sdei_handler() returns one of:
145 * SDEI_EV_HANDLED - success, return to the interrupted context.
146 * SDEI_EV_FAILED - failure, return this error code to firmare.
147 * virtual-address - success, return to this address.
148 */
149static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
150 struct sdei_registered_event *arg)
151{
152 u32 mode;
153 int i, err = 0;
154 int clobbered_registers = 4;
155 u64 elr = read_sysreg(elr_el1);
156 u32 kernel_mode = read_sysreg(CurrentEL) | 1; /* +SPSel */
157 unsigned long vbar = read_sysreg(vbar_el1);
158
159 if (arm64_kernel_unmapped_at_el0())
160 clobbered_registers++;
161
162 /* Retrieve the missing registers values */
163 for (i = 0; i < clobbered_registers; i++) {
164 /* from within the handler, this call always succeeds */
165 sdei_api_event_context(i, &regs->regs[i]);
166 }
167
168 /*
169 * We didn't take an exception to get here, set PAN. UAO will be cleared
170 * by sdei_event_handler()s set_fs(USER_DS) call.
171 */
172 __uaccess_enable_hw_pan();
173
174 err = sdei_event_handler(regs, arg);
175 if (err)
176 return SDEI_EV_FAILED;
177
178 if (elr != read_sysreg(elr_el1)) {
179 /*
180 * We took a synchronous exception from the SDEI handler.
181 * This could deadlock, and if you interrupt KVM it will
182 * hyp-panic instead.
183 */
184 pr_warn("unsafe: exception during handler\n");
185 }
186
187 mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK);
188
189 /*
190 * If we interrupted the kernel with interrupts masked, we always go
191 * back to wherever we came from.
192 */
193 if (mode == kernel_mode && !interrupts_enabled(regs))
194 return SDEI_EV_HANDLED;
195
196 /*
197 * Otherwise, we pretend this was an IRQ. This lets user space tasks
198 * receive signals before we return to them, and KVM to invoke it's
199 * world switch to do the same.
200 *
201 * See DDI0487B.a Table D1-7 'Vector offsets from vector table base
202 * address'.
203 */
204 if (mode == kernel_mode)
205 return vbar + 0x280;
206 else if (mode & PSR_MODE32_BIT)
207 return vbar + 0x680;
208
209 return vbar + 0x480;
210}
211
212
213asmlinkage __kprobes notrace unsigned long
214__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
215{
216 unsigned long ret;
217 bool do_nmi_exit = false;
218
219 /*
220 * nmi_enter() deals with printk() re-entrance and use of RCU when
221 * RCU believed this CPU was idle. Because critical events can
222 * interrupt normal events, we may already be in_nmi().
223 */
224 if (!in_nmi()) {
225 nmi_enter();
226 do_nmi_exit = true;
227 }
228
229 ret = _sdei_handler(regs, arg);
230
231 if (do_nmi_exit)
232 nmi_exit();
233
234 return ret;
235}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index b120111a46be..f60c052e8d1c 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -178,7 +178,8 @@ static void __user *apply_user_offset(
178 178
179static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) 179static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
180{ 180{
181 struct fpsimd_state *fpsimd = &current->thread.fpsimd_state; 181 struct user_fpsimd_state const *fpsimd =
182 &current->thread.fpsimd_state.user_fpsimd;
182 int err; 183 int err;
183 184
184 /* copy the FP and status/control registers */ 185 /* copy the FP and status/control registers */
@@ -195,7 +196,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
195 196
196static int restore_fpsimd_context(struct fpsimd_context __user *ctx) 197static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
197{ 198{
198 struct fpsimd_state fpsimd; 199 struct user_fpsimd_state fpsimd;
199 __u32 magic, size; 200 __u32 magic, size;
200 int err = 0; 201 int err = 0;
201 202
@@ -266,7 +267,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
266{ 267{
267 int err; 268 int err;
268 unsigned int vq; 269 unsigned int vq;
269 struct fpsimd_state fpsimd; 270 struct user_fpsimd_state fpsimd;
270 struct sve_context sve; 271 struct sve_context sve;
271 272
272 if (__copy_from_user(&sve, user->sve, sizeof(sve))) 273 if (__copy_from_user(&sve, user->sve, sizeof(sve)))
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 22711ee8e36c..a124140c0926 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -228,7 +228,8 @@ union __fpsimd_vreg {
228 228
229static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) 229static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
230{ 230{
231 struct fpsimd_state *fpsimd = &current->thread.fpsimd_state; 231 struct user_fpsimd_state const *fpsimd =
232 &current->thread.fpsimd_state.user_fpsimd;
232 compat_ulong_t magic = VFP_MAGIC; 233 compat_ulong_t magic = VFP_MAGIC;
233 compat_ulong_t size = VFP_STORAGE_SIZE; 234 compat_ulong_t size = VFP_STORAGE_SIZE;
234 compat_ulong_t fpscr, fpexc; 235 compat_ulong_t fpscr, fpexc;
@@ -277,7 +278,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
277 278
278static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) 279static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
279{ 280{
280 struct fpsimd_state fpsimd; 281 struct user_fpsimd_state fpsimd;
281 compat_ulong_t magic = VFP_MAGIC; 282 compat_ulong_t magic = VFP_MAGIC;
282 compat_ulong_t size = VFP_STORAGE_SIZE; 283 compat_ulong_t size = VFP_STORAGE_SIZE;
283 compat_ulong_t fpscr; 284 compat_ulong_t fpscr;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 551eb07c53b6..3b8ad7be9c33 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/acpi.h> 20#include <linux/acpi.h>
21#include <linux/arm_sdei.h>
21#include <linux/delay.h> 22#include <linux/delay.h>
22#include <linux/init.h> 23#include <linux/init.h>
23#include <linux/spinlock.h> 24#include <linux/spinlock.h>
@@ -836,6 +837,7 @@ static void ipi_cpu_stop(unsigned int cpu)
836 set_cpu_online(cpu, false); 837 set_cpu_online(cpu, false);
837 838
838 local_daif_mask(); 839 local_daif_mask();
840 sdei_mask_local_cpu();
839 841
840 while (1) 842 while (1)
841 cpu_relax(); 843 cpu_relax();
@@ -853,6 +855,7 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
853 atomic_dec(&waiting_for_crash_ipi); 855 atomic_dec(&waiting_for_crash_ipi);
854 856
855 local_irq_disable(); 857 local_irq_disable();
858 sdei_mask_local_cpu();
856 859
857#ifdef CONFIG_HOTPLUG_CPU 860#ifdef CONFIG_HOTPLUG_CPU
858 if (cpu_ops[cpu]->cpu_die) 861 if (cpu_ops[cpu]->cpu_die)
@@ -972,6 +975,8 @@ void smp_send_stop(void)
972 if (num_online_cpus() > 1) 975 if (num_online_cpus() > 1)
973 pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", 976 pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
974 cpumask_pr_args(cpu_online_mask)); 977 cpumask_pr_args(cpu_online_mask));
978
979 sdei_mask_local_cpu();
975} 980}
976 981
977#ifdef CONFIG_KEXEC_CORE 982#ifdef CONFIG_KEXEC_CORE
@@ -990,8 +995,10 @@ void crash_smp_send_stop(void)
990 995
991 cpus_stopped = 1; 996 cpus_stopped = 1;
992 997
993 if (num_online_cpus() == 1) 998 if (num_online_cpus() == 1) {
999 sdei_mask_local_cpu();
994 return; 1000 return;
1001 }
995 1002
996 cpumask_copy(&mask, cpu_online_mask); 1003 cpumask_copy(&mask, cpu_online_mask);
997 cpumask_clear_cpu(smp_processor_id(), &mask); 1004 cpumask_clear_cpu(smp_processor_id(), &mask);
@@ -1009,6 +1016,8 @@ void crash_smp_send_stop(void)
1009 if (atomic_read(&waiting_for_crash_ipi) > 0) 1016 if (atomic_read(&waiting_for_crash_ipi) > 0)
1010 pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", 1017 pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
1011 cpumask_pr_args(&mask)); 1018 cpumask_pr_args(&mask));
1019
1020 sdei_mask_local_cpu();
1012} 1021}
1013 1022
1014bool smp_crash_stop_failed(void) 1023bool smp_crash_stop_failed(void)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 3fe5ad884418..a307b9e13392 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -2,6 +2,7 @@
2#include <linux/ftrace.h> 2#include <linux/ftrace.h>
3#include <linux/percpu.h> 3#include <linux/percpu.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/uaccess.h>
5#include <asm/alternative.h> 6#include <asm/alternative.h>
6#include <asm/cacheflush.h> 7#include <asm/cacheflush.h>
7#include <asm/cpufeature.h> 8#include <asm/cpufeature.h>
@@ -51,8 +52,7 @@ void notrace __cpu_suspend_exit(void)
51 * PSTATE was not saved over suspend/resume, re-enable any detected 52 * PSTATE was not saved over suspend/resume, re-enable any detected
52 * features that might not have been set correctly. 53 * features that might not have been set correctly.
53 */ 54 */
54 asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, 55 __uaccess_enable_hw_pan();
55 CONFIG_ARM64_PAN));
56 uao_thread_switch(current); 56 uao_thread_switch(current);
57 57
58 /* 58 /*
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 8d48b233e6ce..21868530018e 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -37,18 +37,14 @@ static int __init get_cpu_for_node(struct device_node *node)
37 if (!cpu_node) 37 if (!cpu_node)
38 return -1; 38 return -1;
39 39
40 for_each_possible_cpu(cpu) { 40 cpu = of_cpu_node_to_id(cpu_node);
41 if (of_get_cpu_node(cpu, NULL) == cpu_node) { 41 if (cpu >= 0)
42 topology_parse_cpu_capacity(cpu_node, cpu); 42 topology_parse_cpu_capacity(cpu_node, cpu);
43 of_node_put(cpu_node); 43 else
44 return cpu; 44 pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
45 }
46 }
47
48 pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
49 45
50 of_node_put(cpu_node); 46 of_node_put(cpu_node);
51 return -1; 47 return cpu;
52} 48}
53 49
54static int __init parse_core(struct device_node *core, int cluster_id, 50static int __init parse_core(struct device_node *core, int cluster_id,
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 3d3588fcd1c7..bbb0fde2780e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -662,17 +662,58 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
662} 662}
663#endif 663#endif
664 664
665asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) 665void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
666{ 666{
667 nmi_enter();
668
669 console_verbose(); 667 console_verbose();
670 668
671 pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n", 669 pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
672 smp_processor_id(), esr, esr_get_class_string(esr)); 670 smp_processor_id(), esr, esr_get_class_string(esr));
673 __show_regs(regs); 671 if (regs)
672 __show_regs(regs);
673
674 nmi_panic(regs, "Asynchronous SError Interrupt");
675
676 cpu_park_loop();
677 unreachable();
678}
679
680bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
681{
682 u32 aet = arm64_ras_serror_get_severity(esr);
683
684 switch (aet) {
685 case ESR_ELx_AET_CE: /* corrected error */
686 case ESR_ELx_AET_UEO: /* restartable, not yet consumed */
687 /*
688 * The CPU can make progress. We may take UEO again as
689 * a more severe error.
690 */
691 return false;
692
693 case ESR_ELx_AET_UEU: /* Uncorrected Unrecoverable */
694 case ESR_ELx_AET_UER: /* Uncorrected Recoverable */
695 /*
696 * The CPU can't make progress. The exception may have
697 * been imprecise.
698 */
699 return true;
700
701 case ESR_ELx_AET_UC: /* Uncontainable or Uncategorized error */
702 default:
703 /* Error has been silently propagated */
704 arm64_serror_panic(regs, esr);
705 }
706}
707
708asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
709{
710 nmi_enter();
711
712 /* non-RAS errors are not containable */
713 if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
714 arm64_serror_panic(regs, esr);
674 715
675 panic("Asynchronous SError Interrupt"); 716 nmi_exit();
676} 717}
677 718
678void __pte_error(const char *file, int line, unsigned long val) 719void __pte_error(const char *file, int line, unsigned long val)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 7da3e5c366a0..0221aca6493d 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -57,6 +57,17 @@ jiffies = jiffies_64;
57#define HIBERNATE_TEXT 57#define HIBERNATE_TEXT
58#endif 58#endif
59 59
60#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
61#define TRAMP_TEXT \
62 . = ALIGN(PAGE_SIZE); \
63 VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \
64 *(.entry.tramp.text) \
65 . = ALIGN(PAGE_SIZE); \
66 VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
67#else
68#define TRAMP_TEXT
69#endif
70
60/* 71/*
61 * The size of the PE/COFF section that covers the kernel image, which 72 * The size of the PE/COFF section that covers the kernel image, which
62 * runs from stext to _edata, must be a round multiple of the PE/COFF 73 * runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -113,6 +124,7 @@ SECTIONS
113 HYPERVISOR_TEXT 124 HYPERVISOR_TEXT
114 IDMAP_TEXT 125 IDMAP_TEXT
115 HIBERNATE_TEXT 126 HIBERNATE_TEXT
127 TRAMP_TEXT
116 *(.fixup) 128 *(.fixup)
117 *(.gnu.warning) 129 *(.gnu.warning)
118 . = ALIGN(16); 130 . = ALIGN(16);
@@ -206,13 +218,19 @@ SECTIONS
206 . = ALIGN(PAGE_SIZE); 218 . = ALIGN(PAGE_SIZE);
207 idmap_pg_dir = .; 219 idmap_pg_dir = .;
208 . += IDMAP_DIR_SIZE; 220 . += IDMAP_DIR_SIZE;
209 swapper_pg_dir = .; 221
210 . += SWAPPER_DIR_SIZE; 222#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
223 tramp_pg_dir = .;
224 . += PAGE_SIZE;
225#endif
211 226
212#ifdef CONFIG_ARM64_SW_TTBR0_PAN 227#ifdef CONFIG_ARM64_SW_TTBR0_PAN
213 reserved_ttbr0 = .; 228 reserved_ttbr0 = .;
214 . += RESERVED_TTBR0_SIZE; 229 . += RESERVED_TTBR0_SIZE;
215#endif 230#endif
231 swapper_pg_dir = .;
232 . += SWAPPER_DIR_SIZE;
233 swapper_pg_end = .;
216 234
217 __pecoff_data_size = ABSOLUTE(. - __initdata_begin); 235 __pecoff_data_size = ABSOLUTE(. - __initdata_begin);
218 _end = .; 236 _end = .;
@@ -234,7 +252,10 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
234ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) 252ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
235 <= SZ_4K, "Hibernate exit text too big or misaligned") 253 <= SZ_4K, "Hibernate exit text too big or misaligned")
236#endif 254#endif
237 255#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
256ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
257 "Entry trampoline text too big")
258#endif
238/* 259/*
239 * If padding is applied before .head.text, virt<->phys conversions will fail. 260 * If padding is applied before .head.text, virt<->phys conversions will fail.
240 */ 261 */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e60494f1eef9..520b0dad3c62 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -23,18 +23,26 @@
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24 24
25#include <asm/esr.h> 25#include <asm/esr.h>
26#include <asm/exception.h>
26#include <asm/kvm_asm.h> 27#include <asm/kvm_asm.h>
27#include <asm/kvm_coproc.h> 28#include <asm/kvm_coproc.h>
28#include <asm/kvm_emulate.h> 29#include <asm/kvm_emulate.h>
29#include <asm/kvm_mmu.h> 30#include <asm/kvm_mmu.h>
30#include <asm/kvm_psci.h> 31#include <asm/kvm_psci.h>
31#include <asm/debug-monitors.h> 32#include <asm/debug-monitors.h>
33#include <asm/traps.h>
32 34
33#define CREATE_TRACE_POINTS 35#define CREATE_TRACE_POINTS
34#include "trace.h" 36#include "trace.h"
35 37
36typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); 38typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
37 39
40static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
41{
42 if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
43 kvm_inject_vabt(vcpu);
44}
45
38static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) 46static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
39{ 47{
40 int ret; 48 int ret;
@@ -242,7 +250,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
242 *vcpu_pc(vcpu) -= adj; 250 *vcpu_pc(vcpu) -= adj;
243 } 251 }
244 252
245 kvm_inject_vabt(vcpu);
246 return 1; 253 return 1;
247 } 254 }
248 255
@@ -252,7 +259,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
252 case ARM_EXCEPTION_IRQ: 259 case ARM_EXCEPTION_IRQ:
253 return 1; 260 return 1;
254 case ARM_EXCEPTION_EL1_SERROR: 261 case ARM_EXCEPTION_EL1_SERROR:
255 kvm_inject_vabt(vcpu);
256 /* We may still need to return for single-step */ 262 /* We may still need to return for single-step */
257 if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS) 263 if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
258 && kvm_arm_handle_step_debug(vcpu, run)) 264 && kvm_arm_handle_step_debug(vcpu, run))
@@ -275,3 +281,25 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
275 return 0; 281 return 0;
276 } 282 }
277} 283}
284
285/* For exit types that need handling before we can be preempted */
286void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
287 int exception_index)
288{
289 if (ARM_SERROR_PENDING(exception_index)) {
290 if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
291 u64 disr = kvm_vcpu_get_disr(vcpu);
292
293 kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
294 } else {
295 kvm_inject_vabt(vcpu);
296 }
297
298 return;
299 }
300
301 exception_index = ARM_EXCEPTION_CODE(exception_index);
302
303 if (exception_index == ARM_EXCEPTION_EL1_SERROR)
304 kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu));
305}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 870828c364c5..e086c6eff8c6 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -63,7 +63,8 @@ __do_hyp_init:
63 cmp x0, #HVC_STUB_HCALL_NR 63 cmp x0, #HVC_STUB_HCALL_NR
64 b.lo __kvm_handle_stub_hvc 64 b.lo __kvm_handle_stub_hvc
65 65
66 msr ttbr0_el2, x0 66 phys_to_ttbr x0, x4
67 msr ttbr0_el2, x4
67 68
68 mrs x4, tcr_el1 69 mrs x4, tcr_el1
69 ldr x5, =TCR_EL2_MASK 70 ldr x5, =TCR_EL2_MASK
@@ -71,30 +72,27 @@ __do_hyp_init:
71 mov x5, #TCR_EL2_RES1 72 mov x5, #TCR_EL2_RES1
72 orr x4, x4, x5 73 orr x4, x4, x5
73 74
74#ifndef CONFIG_ARM64_VA_BITS_48
75 /* 75 /*
76 * If we are running with VA_BITS < 48, we may be running with an extra 76 * The ID map may be configured to use an extended virtual address
77 * level of translation in the ID map. This is only the case if system 77 * range. This is only the case if system RAM is out of range for the
78 * RAM is out of range for the currently configured page size and number 78 * currently configured page size and VA_BITS, in which case we will
79 * of translation levels, in which case we will also need the extra 79 * also need the extended virtual range for the HYP ID map, or we won't
80 * level for the HYP ID map, or we won't be able to enable the EL2 MMU. 80 * be able to enable the EL2 MMU.
81 * 81 *
82 * However, at EL2, there is only one TTBR register, and we can't switch 82 * However, at EL2, there is only one TTBR register, and we can't switch
83 * between translation tables *and* update TCR_EL2.T0SZ at the same 83 * between translation tables *and* update TCR_EL2.T0SZ at the same
84 * time. Bottom line: we need the extra level in *both* our translation 84 * time. Bottom line: we need to use the extended range with *both* our
85 * tables. 85 * translation tables.
86 * 86 *
87 * So use the same T0SZ value we use for the ID map. 87 * So use the same T0SZ value we use for the ID map.
88 */ 88 */
89 ldr_l x5, idmap_t0sz 89 ldr_l x5, idmap_t0sz
90 bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH 90 bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
91#endif 91
92 /* 92 /*
93 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in 93 * Set the PS bits in TCR_EL2.
94 * TCR_EL2.
95 */ 94 */
96 mrs x5, ID_AA64MMFR0_EL1 95 tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6
97 bfi x4, x5, #16, #3
98 96
99 msr tcr_el2, x4 97 msr tcr_el2, x4
100 98
@@ -122,6 +120,10 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
122 kern_hyp_va x2 120 kern_hyp_va x2
123 msr vbar_el2, x2 121 msr vbar_el2, x2
124 122
123 /* copy tpidr_el1 into tpidr_el2 for use by HYP */
124 mrs x1, tpidr_el1
125 msr tpidr_el2, x1
126
125 /* Hello, World! */ 127 /* Hello, World! */
126 eret 128 eret
127ENDPROC(__kvm_hyp_init) 129ENDPROC(__kvm_hyp_init)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 12ee62d6d410..fdd1068ee3a5 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,8 +62,8 @@ ENTRY(__guest_enter)
62 // Store the host regs 62 // Store the host regs
63 save_callee_saved_regs x1 63 save_callee_saved_regs x1
64 64
65 // Store the host_ctxt for use at exit time 65 // Store host_ctxt and vcpu for use at exit time
66 str x1, [sp, #-16]! 66 stp x1, x0, [sp, #-16]!
67 67
68 add x18, x0, #VCPU_CONTEXT 68 add x18, x0, #VCPU_CONTEXT
69 69
@@ -124,6 +124,17 @@ ENTRY(__guest_exit)
124 // Now restore the host regs 124 // Now restore the host regs
125 restore_callee_saved_regs x2 125 restore_callee_saved_regs x2
126 126
127alternative_if ARM64_HAS_RAS_EXTN
128 // If we have the RAS extensions we can consume a pending error
129 // without an unmask-SError and isb.
130 esb
131 mrs_s x2, SYS_DISR_EL1
132 str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
133 cbz x2, 1f
134 msr_s SYS_DISR_EL1, xzr
135 orr x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT)
1361: ret
137alternative_else
127 // If we have a pending asynchronous abort, now is the 138 // If we have a pending asynchronous abort, now is the
128 // time to find out. From your VAXorcist book, page 666: 139 // time to find out. From your VAXorcist book, page 666:
129 // "Threaten me not, oh Evil one! For I speak with 140 // "Threaten me not, oh Evil one! For I speak with
@@ -134,7 +145,9 @@ ENTRY(__guest_exit)
134 mov x5, x0 145 mov x5, x0
135 146
136 dsb sy // Synchronize against in-flight ld/st 147 dsb sy // Synchronize against in-flight ld/st
148 nop
137 msr daifclr, #4 // Unmask aborts 149 msr daifclr, #4 // Unmask aborts
150alternative_endif
138 151
139 // This is our single instruction exception window. A pending 152 // This is our single instruction exception window. A pending
140 // SError is guaranteed to occur at the earliest when we unmask 153 // SError is guaranteed to occur at the earliest when we unmask
@@ -159,6 +172,10 @@ abort_guest_exit_end:
159ENDPROC(__guest_exit) 172ENDPROC(__guest_exit)
160 173
161ENTRY(__fpsimd_guest_restore) 174ENTRY(__fpsimd_guest_restore)
175 // x0: esr
176 // x1: vcpu
177 // x2-x29,lr: vcpu regs
178 // vcpu x0-x1 on the stack
162 stp x2, x3, [sp, #-16]! 179 stp x2, x3, [sp, #-16]!
163 stp x4, lr, [sp, #-16]! 180 stp x4, lr, [sp, #-16]!
164 181
@@ -173,7 +190,7 @@ alternative_else
173alternative_endif 190alternative_endif
174 isb 191 isb
175 192
176 mrs x3, tpidr_el2 193 mov x3, x1
177 194
178 ldr x0, [x3, #VCPU_HOST_CONTEXT] 195 ldr x0, [x3, #VCPU_HOST_CONTEXT]
179 kern_hyp_va x0 196 kern_hyp_va x0
@@ -196,3 +213,15 @@ alternative_endif
196 213
197 eret 214 eret
198ENDPROC(__fpsimd_guest_restore) 215ENDPROC(__fpsimd_guest_restore)
216
217ENTRY(__qcom_hyp_sanitize_btac_predictors)
218 /**
219 * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700)
220 * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls
221 * b15-b0: contains SiP functionID
222 */
223 movz x0, #0x1700
224 movk x0, #0xc200, lsl #16
225 smc #0
226 ret
227ENDPROC(__qcom_hyp_sanitize_btac_predictors)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 5170ce1021da..e4f37b9dd47c 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -104,6 +104,7 @@ el1_trap:
104 /* 104 /*
105 * x0: ESR_EC 105 * x0: ESR_EC
106 */ 106 */
107 ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter
107 108
108 /* 109 /*
109 * We trap the first access to the FP/SIMD to save the host context 110 * We trap the first access to the FP/SIMD to save the host context
@@ -116,19 +117,18 @@ alternative_if_not ARM64_HAS_NO_FPSIMD
116 b.eq __fpsimd_guest_restore 117 b.eq __fpsimd_guest_restore
117alternative_else_nop_endif 118alternative_else_nop_endif
118 119
119 mrs x1, tpidr_el2
120 mov x0, #ARM_EXCEPTION_TRAP 120 mov x0, #ARM_EXCEPTION_TRAP
121 b __guest_exit 121 b __guest_exit
122 122
123el1_irq: 123el1_irq:
124 stp x0, x1, [sp, #-16]! 124 stp x0, x1, [sp, #-16]!
125 mrs x1, tpidr_el2 125 ldr x1, [sp, #16 + 8]
126 mov x0, #ARM_EXCEPTION_IRQ 126 mov x0, #ARM_EXCEPTION_IRQ
127 b __guest_exit 127 b __guest_exit
128 128
129el1_error: 129el1_error:
130 stp x0, x1, [sp, #-16]! 130 stp x0, x1, [sp, #-16]!
131 mrs x1, tpidr_el2 131 ldr x1, [sp, #16 + 8]
132 mov x0, #ARM_EXCEPTION_EL1_SERROR 132 mov x0, #ARM_EXCEPTION_EL1_SERROR
133 b __guest_exit 133 b __guest_exit
134 134
@@ -163,6 +163,18 @@ ENTRY(__hyp_do_panic)
163 eret 163 eret
164ENDPROC(__hyp_do_panic) 164ENDPROC(__hyp_do_panic)
165 165
166ENTRY(__hyp_panic)
167 /*
168 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
169 * not be accessible by this address from EL2, hyp_panic() converts
170 * it with kern_hyp_va() before use.
171 */
172 ldr x0, =kvm_host_cpu_state
173 mrs x1, tpidr_el2
174 add x0, x0, x1
175 b hyp_panic
176ENDPROC(__hyp_panic)
177
166.macro invalid_vector label, target = __hyp_panic 178.macro invalid_vector label, target = __hyp_panic
167 .align 2 179 .align 2
168\label: 180\label:
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index a81f5e10fc8c..603e1ee83e89 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -32,6 +32,8 @@ u32 __hyp_text __init_stage2_translation(void)
32 * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... 32 * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
33 */ 33 */
34 parange = read_sysreg(id_aa64mmfr0_el1) & 7; 34 parange = read_sysreg(id_aa64mmfr0_el1) & 7;
35 if (parange > ID_AA64MMFR0_PARANGE_MAX)
36 parange = ID_AA64MMFR0_PARANGE_MAX;
35 val |= parange << 16; 37 val |= parange << 16;
36 38
37 /* Compute the actual PARange... */ 39 /* Compute the actual PARange... */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index f7c651f3a8c0..036e1f3d77a6 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -17,6 +17,7 @@
17 17
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/jump_label.h> 19#include <linux/jump_label.h>
20#include <uapi/linux/psci.h>
20 21
21#include <asm/kvm_asm.h> 22#include <asm/kvm_asm.h>
22#include <asm/kvm_emulate.h> 23#include <asm/kvm_emulate.h>
@@ -52,7 +53,7 @@ static void __hyp_text __activate_traps_vhe(void)
52 val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN); 53 val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
53 write_sysreg(val, cpacr_el1); 54 write_sysreg(val, cpacr_el1);
54 55
55 write_sysreg(__kvm_hyp_vector, vbar_el1); 56 write_sysreg(kvm_get_hyp_vector(), vbar_el1);
56} 57}
57 58
58static void __hyp_text __activate_traps_nvhe(void) 59static void __hyp_text __activate_traps_nvhe(void)
@@ -93,6 +94,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
93 94
94 write_sysreg(val, hcr_el2); 95 write_sysreg(val, hcr_el2);
95 96
97 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE))
98 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
99
96 /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ 100 /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
97 write_sysreg(1 << 15, hstr_el2); 101 write_sysreg(1 << 15, hstr_el2);
98 /* 102 /*
@@ -235,11 +239,12 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
235 239
236static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) 240static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
237{ 241{
238 u64 esr = read_sysreg_el2(esr); 242 u8 ec;
239 u8 ec = ESR_ELx_EC(esr); 243 u64 esr;
240 u64 hpfar, far; 244 u64 hpfar, far;
241 245
242 vcpu->arch.fault.esr_el2 = esr; 246 esr = vcpu->arch.fault.esr_el2;
247 ec = ESR_ELx_EC(esr);
243 248
244 if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) 249 if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
245 return true; 250 return true;
@@ -305,9 +310,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
305 u64 exit_code; 310 u64 exit_code;
306 311
307 vcpu = kern_hyp_va(vcpu); 312 vcpu = kern_hyp_va(vcpu);
308 write_sysreg(vcpu, tpidr_el2);
309 313
310 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); 314 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
315 host_ctxt->__hyp_running_vcpu = vcpu;
311 guest_ctxt = &vcpu->arch.ctxt; 316 guest_ctxt = &vcpu->arch.ctxt;
312 317
313 __sysreg_save_host_state(host_ctxt); 318 __sysreg_save_host_state(host_ctxt);
@@ -332,6 +337,8 @@ again:
332 exit_code = __guest_enter(vcpu, host_ctxt); 337 exit_code = __guest_enter(vcpu, host_ctxt);
333 /* And we're baaack! */ 338 /* And we're baaack! */
334 339
340 if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ)
341 vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
335 /* 342 /*
336 * We're using the raw exception code in order to only process 343 * We're using the raw exception code in order to only process
337 * the trap if no SError is pending. We will come back to the 344 * the trap if no SError is pending. We will come back to the
@@ -341,6 +348,18 @@ again:
341 if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) 348 if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
342 goto again; 349 goto again;
343 350
351 if (exit_code == ARM_EXCEPTION_TRAP &&
352 (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 ||
353 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) &&
354 vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) {
355 u64 val = PSCI_RET_NOT_SUPPORTED;
356 if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
357 val = 2;
358
359 vcpu_set_reg(vcpu, 0, val);
360 goto again;
361 }
362
344 if (static_branch_unlikely(&vgic_v2_cpuif_trap) && 363 if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
345 exit_code == ARM_EXCEPTION_TRAP) { 364 exit_code == ARM_EXCEPTION_TRAP) {
346 bool valid; 365 bool valid;
@@ -393,6 +412,14 @@ again:
393 /* 0 falls through to be handled out of EL2 */ 412 /* 0 falls through to be handled out of EL2 */
394 } 413 }
395 414
415 if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) {
416 u32 midr = read_cpuid_id();
417
418 /* Apply BTAC predictors mitigation to all Falkor chips */
419 if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
420 __qcom_hyp_sanitize_btac_predictors();
421 }
422
396 fp_enabled = __fpsimd_enabled(); 423 fp_enabled = __fpsimd_enabled();
397 424
398 __sysreg_save_guest_state(guest_ctxt); 425 __sysreg_save_guest_state(guest_ctxt);
@@ -422,7 +449,8 @@ again:
422 449
423static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; 450static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
424 451
425static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) 452static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
453 struct kvm_vcpu *vcpu)
426{ 454{
427 unsigned long str_va; 455 unsigned long str_va;
428 456
@@ -436,35 +464,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
436 __hyp_do_panic(str_va, 464 __hyp_do_panic(str_va,
437 spsr, elr, 465 spsr, elr,
438 read_sysreg(esr_el2), read_sysreg_el2(far), 466 read_sysreg(esr_el2), read_sysreg_el2(far),
439 read_sysreg(hpfar_el2), par, 467 read_sysreg(hpfar_el2), par, vcpu);
440 (void *)read_sysreg(tpidr_el2));
441} 468}
442 469
443static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par) 470static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
471 struct kvm_vcpu *vcpu)
444{ 472{
445 panic(__hyp_panic_string, 473 panic(__hyp_panic_string,
446 spsr, elr, 474 spsr, elr,
447 read_sysreg_el2(esr), read_sysreg_el2(far), 475 read_sysreg_el2(esr), read_sysreg_el2(far),
448 read_sysreg(hpfar_el2), par, 476 read_sysreg(hpfar_el2), par, vcpu);
449 (void *)read_sysreg(tpidr_el2));
450} 477}
451 478
452static hyp_alternate_select(__hyp_call_panic, 479static hyp_alternate_select(__hyp_call_panic,
453 __hyp_call_panic_nvhe, __hyp_call_panic_vhe, 480 __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
454 ARM64_HAS_VIRT_HOST_EXTN); 481 ARM64_HAS_VIRT_HOST_EXTN);
455 482
456void __hyp_text __noreturn __hyp_panic(void) 483void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
457{ 484{
485 struct kvm_vcpu *vcpu = NULL;
486
458 u64 spsr = read_sysreg_el2(spsr); 487 u64 spsr = read_sysreg_el2(spsr);
459 u64 elr = read_sysreg_el2(elr); 488 u64 elr = read_sysreg_el2(elr);
460 u64 par = read_sysreg(par_el1); 489 u64 par = read_sysreg(par_el1);
461 490
462 if (read_sysreg(vttbr_el2)) { 491 if (read_sysreg(vttbr_el2)) {
463 struct kvm_vcpu *vcpu;
464 struct kvm_cpu_context *host_ctxt; 492 struct kvm_cpu_context *host_ctxt;
465 493
466 vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); 494 host_ctxt = kern_hyp_va(__host_ctxt);
467 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); 495 vcpu = host_ctxt->__hyp_running_vcpu;
468 __timer_disable_traps(vcpu); 496 __timer_disable_traps(vcpu);
469 __deactivate_traps(vcpu); 497 __deactivate_traps(vcpu);
470 __deactivate_vm(vcpu); 498 __deactivate_vm(vcpu);
@@ -472,7 +500,7 @@ void __hyp_text __noreturn __hyp_panic(void)
472 } 500 }
473 501
474 /* Call panic for real */ 502 /* Call panic for real */
475 __hyp_call_panic()(spsr, elr, par); 503 __hyp_call_panic()(spsr, elr, par, vcpu);
476 504
477 unreachable(); 505 unreachable();
478} 506}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 934137647837..2c17afd2be96 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
27/* 27/*
28 * Non-VHE: Both host and guest must save everything. 28 * Non-VHE: Both host and guest must save everything.
29 * 29 *
30 * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc, 30 * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
31 * pstate, and guest must save everything. 31 * and guest must save everything.
32 */ 32 */
33 33
34static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) 34static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@@ -36,11 +36,8 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
36 ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); 36 ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
37 ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); 37 ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
38 ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); 38 ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
39 ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
40 ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); 39 ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
41 ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); 40 ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
42 ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
43 ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
44} 41}
45 42
46static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) 43static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
@@ -62,10 +59,16 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
62 ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair); 59 ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
63 ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl); 60 ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
64 ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); 61 ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
62 ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
65 63
66 ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); 64 ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
67 ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); 65 ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
68 ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); 66 ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
67 ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
68 ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
69
70 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
71 ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
69} 72}
70 73
71static hyp_alternate_select(__sysreg_call_save_host_state, 74static hyp_alternate_select(__sysreg_call_save_host_state,
@@ -89,11 +92,8 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
89 write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); 92 write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
90 write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); 93 write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
91 write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); 94 write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
92 write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
93 write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); 95 write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
94 write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); 96 write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
95 write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
96 write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
97} 97}
98 98
99static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) 99static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
@@ -115,10 +115,16 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
115 write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair); 115 write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
116 write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl); 116 write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
117 write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); 117 write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
118 write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
118 119
119 write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); 120 write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
120 write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); 121 write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
121 write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); 122 write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
123 write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
124 write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
125
126 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
127 write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
122} 128}
123 129
124static hyp_alternate_select(__sysreg_call_restore_host_state, 130static hyp_alternate_select(__sysreg_call_restore_host_state,
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 8ecbcb40e317..60666a056944 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -164,14 +164,25 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
164 inject_undef64(vcpu); 164 inject_undef64(vcpu);
165} 165}
166 166
167static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
168{
169 vcpu_set_vsesr(vcpu, esr);
170 vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
171}
172
167/** 173/**
168 * kvm_inject_vabt - inject an async abort / SError into the guest 174 * kvm_inject_vabt - inject an async abort / SError into the guest
169 * @vcpu: The VCPU to receive the exception 175 * @vcpu: The VCPU to receive the exception
170 * 176 *
171 * It is assumed that this code is called from the VCPU thread and that the 177 * It is assumed that this code is called from the VCPU thread and that the
172 * VCPU therefore is not currently executing guest code. 178 * VCPU therefore is not currently executing guest code.
179 *
180 * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with
181 * the remaining ISS all-zeros so that this error is not interpreted as an
182 * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR
183 * value, so the CPU generates an imp-def value.
173 */ 184 */
174void kvm_inject_vabt(struct kvm_vcpu *vcpu) 185void kvm_inject_vabt(struct kvm_vcpu *vcpu)
175{ 186{
176 vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE); 187 pend_guest_serror(vcpu, ESR_ELx_ISV);
177} 188}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1830ebc227d1..50a43c7b97ca 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1159,6 +1159,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
1159 { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 }, 1159 { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
1160 { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, 1160 { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
1161 { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, 1161 { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
1162
1163 { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi },
1164 { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi },
1165 { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi },
1166 { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi },
1167 { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi },
1168 { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi },
1169 { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
1170 { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
1171
1162 { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, 1172 { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
1163 { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, 1173 { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
1164 1174
@@ -1169,6 +1179,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
1169 { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, 1179 { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
1170 1180
1171 { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, 1181 { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
1182 { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
1172 1183
1173 { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only }, 1184 { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
1174 { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only }, 1185 { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index e88fb99c1561..3d69a8d41fa5 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -30,7 +30,7 @@
30 * Alignment fixed up by hardware. 30 * Alignment fixed up by hardware.
31 */ 31 */
32ENTRY(__clear_user) 32ENTRY(__clear_user)
33 uaccess_enable_not_uao x2, x3 33 uaccess_enable_not_uao x2, x3, x4
34 mov x2, x1 // save the size for fixup return 34 mov x2, x1 // save the size for fixup return
35 subs x1, x1, #8 35 subs x1, x1, #8
36 b.mi 2f 36 b.mi 2f
@@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
50 b.mi 5f 50 b.mi 5f
51uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 51uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
525: mov x0, #0 525: mov x0, #0
53 uaccess_disable_not_uao x2 53 uaccess_disable_not_uao x2, x3
54 ret 54 ret
55ENDPROC(__clear_user) 55ENDPROC(__clear_user)
56 56
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4b5d826895ff..20305d485046 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -64,10 +64,10 @@
64 64
65end .req x5 65end .req x5
66ENTRY(__arch_copy_from_user) 66ENTRY(__arch_copy_from_user)
67 uaccess_enable_not_uao x3, x4 67 uaccess_enable_not_uao x3, x4, x5
68 add end, x0, x2 68 add end, x0, x2
69#include "copy_template.S" 69#include "copy_template.S"
70 uaccess_disable_not_uao x3 70 uaccess_disable_not_uao x3, x4
71 mov x0, #0 // Nothing to copy 71 mov x0, #0 // Nothing to copy
72 ret 72 ret
73ENDPROC(__arch_copy_from_user) 73ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index b24a830419ad..fbb090f431a5 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -65,10 +65,10 @@
65 65
66end .req x5 66end .req x5
67ENTRY(raw_copy_in_user) 67ENTRY(raw_copy_in_user)
68 uaccess_enable_not_uao x3, x4 68 uaccess_enable_not_uao x3, x4, x5
69 add end, x0, x2 69 add end, x0, x2
70#include "copy_template.S" 70#include "copy_template.S"
71 uaccess_disable_not_uao x3 71 uaccess_disable_not_uao x3, x4
72 mov x0, #0 72 mov x0, #0
73 ret 73 ret
74ENDPROC(raw_copy_in_user) 74ENDPROC(raw_copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 351f0766f7a6..fda6172d6b88 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -63,10 +63,10 @@
63 63
64end .req x5 64end .req x5
65ENTRY(__arch_copy_to_user) 65ENTRY(__arch_copy_to_user)
66 uaccess_enable_not_uao x3, x4 66 uaccess_enable_not_uao x3, x4, x5
67 add end, x0, x2 67 add end, x0, x2
68#include "copy_template.S" 68#include "copy_template.S"
69 uaccess_disable_not_uao x3 69 uaccess_disable_not_uao x3, x4
70 mov x0, #0 70 mov x0, #0
71 ret 71 ret
72ENDPROC(__arch_copy_to_user) 72ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
index 0179a43cc045..d3db9b2cd479 100644
--- a/arch/arm64/lib/tishift.S
+++ b/arch/arm64/lib/tishift.S
@@ -38,19 +38,19 @@ ENTRY(__ashlti3)
38ENDPROC(__ashlti3) 38ENDPROC(__ashlti3)
39 39
40ENTRY(__ashrti3) 40ENTRY(__ashrti3)
41 cbz x2, 3f 41 cbz x2, 1f
42 mov x3, #64 42 mov x3, #64
43 sub x3, x3, x2 43 sub x3, x3, x2
44 cmp x3, #0 44 cmp x3, #0
45 b.le 4f 45 b.le 2f
46 lsr x0, x0, x2 46 lsr x0, x0, x2
47 lsl x3, x1, x3 47 lsl x3, x1, x3
48 asr x2, x1, x2 48 asr x2, x1, x2
49 orr x0, x0, x3 49 orr x0, x0, x3
50 mov x1, x2 50 mov x1, x2
513: 511:
52 ret 52 ret
534: 532:
54 neg w0, w3 54 neg w0, w3
55 asr x2, x1, #63 55 asr x2, x1, #63
56 asr x0, x1, x0 56 asr x0, x1, x0
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7f1dbe962cf5..91464e7f77cc 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -49,7 +49,7 @@ ENTRY(flush_icache_range)
49 * - end - virtual end address of region 49 * - end - virtual end address of region
50 */ 50 */
51ENTRY(__flush_cache_user_range) 51ENTRY(__flush_cache_user_range)
52 uaccess_ttbr0_enable x2, x3 52 uaccess_ttbr0_enable x2, x3, x4
53 dcache_line_size x2, x3 53 dcache_line_size x2, x3
54 sub x3, x2, #1 54 sub x3, x2, #1
55 bic x4, x0, x3 55 bic x4, x0, x3
@@ -72,7 +72,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU
72 isb 72 isb
73 mov x0, #0 73 mov x0, #0
741: 741:
75 uaccess_ttbr0_disable x1 75 uaccess_ttbr0_disable x1, x2
76 ret 76 ret
779: 779:
78 mov x0, #-EFAULT 78 mov x0, #-EFAULT
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 6f4017046323..301417ae2ba8 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending;
39 39
40#define ASID_MASK (~GENMASK(asid_bits - 1, 0)) 40#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
41#define ASID_FIRST_VERSION (1UL << asid_bits) 41#define ASID_FIRST_VERSION (1UL << asid_bits)
42#define NUM_USER_ASIDS ASID_FIRST_VERSION 42
43#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
44#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
45#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
46#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK)
47#else
48#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
49#define asid2idx(asid) ((asid) & ~ASID_MASK)
50#define idx2asid(idx) asid2idx(idx)
51#endif
43 52
44/* Get the ASIDBits supported by the current CPU */ 53/* Get the ASIDBits supported by the current CPU */
45static u32 get_cpu_asid_bits(void) 54static u32 get_cpu_asid_bits(void)
@@ -79,13 +88,6 @@ void verify_cpu_asid_bits(void)
79 } 88 }
80} 89}
81 90
82static void set_reserved_asid_bits(void)
83{
84 if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) &&
85 cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
86 __set_bit(FALKOR_RESERVED_ASID, asid_map);
87}
88
89static void flush_context(unsigned int cpu) 91static void flush_context(unsigned int cpu)
90{ 92{
91 int i; 93 int i;
@@ -94,8 +96,6 @@ static void flush_context(unsigned int cpu)
94 /* Update the list of reserved ASIDs and the ASID bitmap. */ 96 /* Update the list of reserved ASIDs and the ASID bitmap. */
95 bitmap_clear(asid_map, 0, NUM_USER_ASIDS); 97 bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
96 98
97 set_reserved_asid_bits();
98
99 for_each_possible_cpu(i) { 99 for_each_possible_cpu(i) {
100 asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); 100 asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
101 /* 101 /*
@@ -107,7 +107,7 @@ static void flush_context(unsigned int cpu)
107 */ 107 */
108 if (asid == 0) 108 if (asid == 0)
109 asid = per_cpu(reserved_asids, i); 109 asid = per_cpu(reserved_asids, i);
110 __set_bit(asid & ~ASID_MASK, asid_map); 110 __set_bit(asid2idx(asid), asid_map);
111 per_cpu(reserved_asids, i) = asid; 111 per_cpu(reserved_asids, i) = asid;
112 } 112 }
113 113
@@ -162,16 +162,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
162 * We had a valid ASID in a previous life, so try to re-use 162 * We had a valid ASID in a previous life, so try to re-use
163 * it if possible. 163 * it if possible.
164 */ 164 */
165 asid &= ~ASID_MASK; 165 if (!__test_and_set_bit(asid2idx(asid), asid_map))
166 if (!__test_and_set_bit(asid, asid_map))
167 return newasid; 166 return newasid;
168 } 167 }
169 168
170 /* 169 /*
171 * Allocate a free ASID. If we can't find one, take a note of the 170 * Allocate a free ASID. If we can't find one, take a note of the
172 * currently active ASIDs and mark the TLBs as requiring flushes. 171 * currently active ASIDs and mark the TLBs as requiring flushes. We
173 * We always count from ASID #1, as we use ASID #0 when setting a 172 * always count from ASID #2 (index 1), as we use ASID #0 when setting
174 * reserved TTBR0 for the init_mm. 173 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
174 * pairs.
175 */ 175 */
176 asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); 176 asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
177 if (asid != NUM_USER_ASIDS) 177 if (asid != NUM_USER_ASIDS)
@@ -188,32 +188,35 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
188set_asid: 188set_asid:
189 __set_bit(asid, asid_map); 189 __set_bit(asid, asid_map);
190 cur_idx = asid; 190 cur_idx = asid;
191 return asid | generation; 191 return idx2asid(asid) | generation;
192} 192}
193 193
194void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) 194void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
195{ 195{
196 unsigned long flags; 196 unsigned long flags;
197 u64 asid; 197 u64 asid, old_active_asid;
198 198
199 asid = atomic64_read(&mm->context.id); 199 asid = atomic64_read(&mm->context.id);
200 200
201 /* 201 /*
202 * The memory ordering here is subtle. 202 * The memory ordering here is subtle.
203 * If our ASID matches the current generation, then we update 203 * If our active_asids is non-zero and the ASID matches the current
204 * our active_asids entry with a relaxed xchg. Racing with a 204 * generation, then we update the active_asids entry with a relaxed
205 * concurrent rollover means that either: 205 * cmpxchg. Racing with a concurrent rollover means that either:
206 * 206 *
207 * - We get a zero back from the xchg and end up waiting on the 207 * - We get a zero back from the cmpxchg and end up waiting on the
208 * lock. Taking the lock synchronises with the rollover and so 208 * lock. Taking the lock synchronises with the rollover and so
209 * we are forced to see the updated generation. 209 * we are forced to see the updated generation.
210 * 210 *
211 * - We get a valid ASID back from the xchg, which means the 211 * - We get a valid ASID back from the cmpxchg, which means the
212 * relaxed xchg in flush_context will treat us as reserved 212 * relaxed xchg in flush_context will treat us as reserved
213 * because atomic RmWs are totally ordered for a given location. 213 * because atomic RmWs are totally ordered for a given location.
214 */ 214 */
215 if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) 215 old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
216 && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) 216 if (old_active_asid &&
217 !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) &&
218 atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
219 old_active_asid, asid))
217 goto switch_mm_fastpath; 220 goto switch_mm_fastpath;
218 221
219 raw_spin_lock_irqsave(&cpu_asid_lock, flags); 222 raw_spin_lock_irqsave(&cpu_asid_lock, flags);
@@ -231,6 +234,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
231 raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); 234 raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
232 235
233switch_mm_fastpath: 236switch_mm_fastpath:
237
238 arm64_apply_bp_hardening();
239
234 /* 240 /*
235 * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when 241 * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when
236 * emulating PAN. 242 * emulating PAN.
@@ -239,6 +245,15 @@ switch_mm_fastpath:
239 cpu_switch_mm(mm->pgd, mm); 245 cpu_switch_mm(mm->pgd, mm);
240} 246}
241 247
248/* Errata workaround post TTBRx_EL1 update. */
249asmlinkage void post_ttbr_update_workaround(void)
250{
251 asm(ALTERNATIVE("nop; nop; nop",
252 "ic iallu; dsb nsh; isb",
253 ARM64_WORKAROUND_CAVIUM_27456,
254 CONFIG_CAVIUM_ERRATUM_27456));
255}
256
242static int asids_init(void) 257static int asids_init(void)
243{ 258{
244 asid_bits = get_cpu_asid_bits(); 259 asid_bits = get_cpu_asid_bits();
@@ -254,8 +269,6 @@ static int asids_init(void)
254 panic("Failed to allocate bitmap for %lu ASIDs\n", 269 panic("Failed to allocate bitmap for %lu ASIDs\n",
255 NUM_USER_ASIDS); 270 NUM_USER_ASIDS);
256 271
257 set_reserved_asid_bits();
258
259 pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); 272 pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
260 return 0; 273 return 0;
261} 274}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9b7f89df49db..6c30cf92f492 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -707,6 +707,23 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
707 arm64_notify_die("", regs, &info, esr); 707 arm64_notify_die("", regs, &info, esr);
708} 708}
709 709
710asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
711 unsigned int esr,
712 struct pt_regs *regs)
713{
714 /*
715 * We've taken an instruction abort from userspace and not yet
716 * re-enabled IRQs. If the address is a kernel address, apply
717 * BP hardening prior to enabling IRQs and pre-emption.
718 */
719 if (addr > TASK_SIZE)
720 arm64_apply_bp_hardening();
721
722 local_irq_enable();
723 do_mem_abort(addr, esr, regs);
724}
725
726
710asmlinkage void __exception do_sp_pc_abort(unsigned long addr, 727asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
711 unsigned int esr, 728 unsigned int esr,
712 struct pt_regs *regs) 729 struct pt_regs *regs)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 00e7b900ca41..c903f7ccbdd2 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -366,6 +366,9 @@ void __init arm64_memblock_init(void)
366 /* Handle linux,usable-memory-range property */ 366 /* Handle linux,usable-memory-range property */
367 fdt_enforce_memory_region(); 367 fdt_enforce_memory_region();
368 368
369 /* Remove memory above our supported physical address size */
370 memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX);
371
369 /* 372 /*
370 * Ensure that the linear region takes up exactly half of the kernel 373 * Ensure that the linear region takes up exactly half of the kernel
371 * virtual address space. This way, we can distinguish a linear address 374 * virtual address space. This way, we can distinguish a linear address
@@ -600,49 +603,6 @@ void __init mem_init(void)
600 603
601 mem_init_print_info(NULL); 604 mem_init_print_info(NULL);
602 605
603#define MLK(b, t) b, t, ((t) - (b)) >> 10
604#define MLM(b, t) b, t, ((t) - (b)) >> 20
605#define MLG(b, t) b, t, ((t) - (b)) >> 30
606#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
607
608 pr_notice("Virtual kernel memory layout:\n");
609#ifdef CONFIG_KASAN
610 pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n",
611 MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
612#endif
613 pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n",
614 MLM(MODULES_VADDR, MODULES_END));
615 pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n",
616 MLG(VMALLOC_START, VMALLOC_END));
617 pr_notice(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n",
618 MLK_ROUNDUP(_text, _etext));
619 pr_notice(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n",
620 MLK_ROUNDUP(__start_rodata, __init_begin));
621 pr_notice(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n",
622 MLK_ROUNDUP(__init_begin, __init_end));
623 pr_notice(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
624 MLK_ROUNDUP(_sdata, _edata));
625 pr_notice(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n",
626 MLK_ROUNDUP(__bss_start, __bss_stop));
627 pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n",
628 MLK(FIXADDR_START, FIXADDR_TOP));
629 pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n",
630 MLM(PCI_IO_START, PCI_IO_END));
631#ifdef CONFIG_SPARSEMEM_VMEMMAP
632 pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n",
633 MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
634 pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n",
635 MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
636 (unsigned long)virt_to_page(high_memory)));
637#endif
638 pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n",
639 MLM(__phys_to_virt(memblock_start_of_DRAM()),
640 (unsigned long)high_memory));
641
642#undef MLK
643#undef MLM
644#undef MLK_ROUNDUP
645
646 /* 606 /*
647 * Check boundaries twice: Some fundamental inconsistencies can be 607 * Check boundaries twice: Some fundamental inconsistencies can be
648 * detected at build time already. 608 * detected at build time already.
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 267d2b79d52d..b44992ec9643 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -50,6 +50,7 @@
50#define NO_CONT_MAPPINGS BIT(1) 50#define NO_CONT_MAPPINGS BIT(1)
51 51
52u64 idmap_t0sz = TCR_T0SZ(VA_BITS); 52u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
53u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
53 54
54u64 kimage_voffset __ro_after_init; 55u64 kimage_voffset __ro_after_init;
55EXPORT_SYMBOL(kimage_voffset); 56EXPORT_SYMBOL(kimage_voffset);
@@ -525,6 +526,35 @@ static int __init parse_rodata(char *arg)
525} 526}
526early_param("rodata", parse_rodata); 527early_param("rodata", parse_rodata);
527 528
529#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
530static int __init map_entry_trampoline(void)
531{
532 pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
533 phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
534
535 /* The trampoline is always mapped and can therefore be global */
536 pgprot_val(prot) &= ~PTE_NG;
537
538 /* Map only the text into the trampoline page table */
539 memset(tramp_pg_dir, 0, PGD_SIZE);
540 __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
541 prot, pgd_pgtable_alloc, 0);
542
543 /* Map both the text and data into the kernel page table */
544 __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
545 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
546 extern char __entry_tramp_data_start[];
547
548 __set_fixmap(FIX_ENTRY_TRAMP_DATA,
549 __pa_symbol(__entry_tramp_data_start),
550 PAGE_KERNEL_RO);
551 }
552
553 return 0;
554}
555core_initcall(map_entry_trampoline);
556#endif
557
528/* 558/*
529 * Create fine-grained mappings for the kernel. 559 * Create fine-grained mappings for the kernel.
530 */ 560 */
@@ -570,8 +600,8 @@ static void __init map_kernel(pgd_t *pgd)
570 * entry instead. 600 * entry instead.
571 */ 601 */
572 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); 602 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
573 set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), 603 pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
574 __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE)); 604 lm_alias(bm_pmd));
575 pud_clear_fixmap(); 605 pud_clear_fixmap();
576 } else { 606 } else {
577 BUG(); 607 BUG();
@@ -612,7 +642,8 @@ void __init paging_init(void)
612 * allocated with it. 642 * allocated with it.
613 */ 643 */
614 memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, 644 memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
615 SWAPPER_DIR_SIZE - PAGE_SIZE); 645 __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
646 - PAGE_SIZE);
616} 647}
617 648
618/* 649/*
@@ -686,7 +717,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
686 if (!p) 717 if (!p)
687 return -ENOMEM; 718 return -ENOMEM;
688 719
689 set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); 720 pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
690 } else 721 } else
691 vmemmap_verify((pte_t *)pmd, node, addr, next); 722 vmemmap_verify((pte_t *)pmd, node, addr, next);
692 } while (addr = next, addr != end); 723 } while (addr = next, addr != end);
@@ -879,15 +910,19 @@ int __init arch_ioremap_pmd_supported(void)
879 910
880int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) 911int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
881{ 912{
913 pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
914 pgprot_val(mk_sect_prot(prot)));
882 BUG_ON(phys & ~PUD_MASK); 915 BUG_ON(phys & ~PUD_MASK);
883 set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); 916 set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
884 return 1; 917 return 1;
885} 918}
886 919
887int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) 920int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
888{ 921{
922 pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
923 pgprot_val(mk_sect_prot(prot)));
889 BUG_ON(phys & ~PMD_MASK); 924 BUG_ON(phys & ~PMD_MASK);
890 set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); 925 set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
891 return 1; 926 return 1;
892} 927}
893 928
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 051e71ec3335..289f9113a27a 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -49,6 +49,14 @@ void __init pgd_cache_init(void)
49 if (PGD_SIZE == PAGE_SIZE) 49 if (PGD_SIZE == PAGE_SIZE)
50 return; 50 return;
51 51
52#ifdef CONFIG_ARM64_PA_BITS_52
53 /*
54 * With 52-bit physical addresses, the architecture requires the
55 * top-level table to be aligned to at least 64 bytes.
56 */
57 BUILD_BUG_ON(PGD_SIZE < 64);
58#endif
59
52 /* 60 /*
53 * Naturally aligned pgds required by the architecture. 61 * Naturally aligned pgds required by the architecture.
54 */ 62 */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 95233dfc4c39..9f177aac6390 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend)
70 mrs x8, mdscr_el1 70 mrs x8, mdscr_el1
71 mrs x9, oslsr_el1 71 mrs x9, oslsr_el1
72 mrs x10, sctlr_el1 72 mrs x10, sctlr_el1
73alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
73 mrs x11, tpidr_el1 74 mrs x11, tpidr_el1
75alternative_else
76 mrs x11, tpidr_el2
77alternative_endif
74 mrs x12, sp_el0 78 mrs x12, sp_el0
75 stp x2, x3, [x0] 79 stp x2, x3, [x0]
76 stp x4, xzr, [x0, #16] 80 stp x4, xzr, [x0, #16]
@@ -116,7 +120,11 @@ ENTRY(cpu_do_resume)
116 msr mdscr_el1, x10 120 msr mdscr_el1, x10
117 121
118 msr sctlr_el1, x12 122 msr sctlr_el1, x12
123alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
119 msr tpidr_el1, x13 124 msr tpidr_el1, x13
125alternative_else
126 msr tpidr_el2, x13
127alternative_endif
120 msr sp_el0, x14 128 msr sp_el0, x14
121 /* 129 /*
122 * Restore oslsr_el1 by writing oslar_el1 130 * Restore oslsr_el1 by writing oslar_el1
@@ -124,6 +132,11 @@ ENTRY(cpu_do_resume)
124 ubfx x11, x11, #1, #1 132 ubfx x11, x11, #1, #1
125 msr oslar_el1, x11 133 msr oslar_el1, x11
126 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 134 reset_pmuserenr_el0 x0 // Disable PMU access from EL0
135
136alternative_if ARM64_HAS_RAS_EXTN
137 msr_s SYS_DISR_EL1, xzr
138alternative_else_nop_endif
139
127 isb 140 isb
128 ret 141 ret
129ENDPROC(cpu_do_resume) 142ENDPROC(cpu_do_resume)
@@ -138,13 +151,18 @@ ENDPROC(cpu_do_resume)
138 * - pgd_phys - physical address of new TTB 151 * - pgd_phys - physical address of new TTB
139 */ 152 */
140ENTRY(cpu_do_switch_mm) 153ENTRY(cpu_do_switch_mm)
141 pre_ttbr0_update_workaround x0, x2, x3 154 mrs x2, ttbr1_el1
142 mmid x1, x1 // get mm->context.id 155 mmid x1, x1 // get mm->context.id
143 bfi x0, x1, #48, #16 // set the ASID 156 phys_to_ttbr x0, x3
144 msr ttbr0_el1, x0 // set TTBR0 157#ifdef CONFIG_ARM64_SW_TTBR0_PAN
158 bfi x3, x1, #48, #16 // set the ASID field in TTBR0
159#endif
160 bfi x2, x1, #48, #16 // set the ASID
161 msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set)
145 isb 162 isb
146 post_ttbr0_update_workaround 163 msr ttbr0_el1, x3 // now update TTBR0
147 ret 164 isb
165 b post_ttbr_update_workaround // Back to C code...
148ENDPROC(cpu_do_switch_mm) 166ENDPROC(cpu_do_switch_mm)
149 167
150 .pushsection ".idmap.text", "ax" 168 .pushsection ".idmap.text", "ax"
@@ -158,14 +176,16 @@ ENTRY(idmap_cpu_replace_ttbr1)
158 save_and_disable_daif flags=x2 176 save_and_disable_daif flags=x2
159 177
160 adrp x1, empty_zero_page 178 adrp x1, empty_zero_page
161 msr ttbr1_el1, x1 179 phys_to_ttbr x1, x3
180 msr ttbr1_el1, x3
162 isb 181 isb
163 182
164 tlbi vmalle1 183 tlbi vmalle1
165 dsb nsh 184 dsb nsh
166 isb 185 isb
167 186
168 msr ttbr1_el1, x0 187 phys_to_ttbr x0, x3
188 msr ttbr1_el1, x3
169 isb 189 isb
170 190
171 restore_daif x2 191 restore_daif x2
@@ -214,25 +234,19 @@ ENTRY(__cpu_setup)
214 /* 234 /*
215 * Prepare SCTLR 235 * Prepare SCTLR
216 */ 236 */
217 adr x5, crval 237 mov_q x0, SCTLR_EL1_SET
218 ldp w5, w6, [x5]
219 mrs x0, sctlr_el1
220 bic x0, x0, x5 // clear bits
221 orr x0, x0, x6 // set bits
222 /* 238 /*
223 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for 239 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
224 * both user and kernel. 240 * both user and kernel.
225 */ 241 */
226 ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ 242 ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
227 TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 243 TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1
228 tcr_set_idmap_t0sz x10, x9 244 tcr_set_idmap_t0sz x10, x9
229 245
230 /* 246 /*
231 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in 247 * Set the IPS bits in TCR_EL1.
232 * TCR_EL1.
233 */ 248 */
234 mrs x9, ID_AA64MMFR0_EL1 249 tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6
235 bfi x10, x9, #32, #3
236#ifdef CONFIG_ARM64_HW_AFDBM 250#ifdef CONFIG_ARM64_HW_AFDBM
237 /* 251 /*
238 * Hardware update of the Access and Dirty bits. 252 * Hardware update of the Access and Dirty bits.
@@ -249,21 +263,3 @@ ENTRY(__cpu_setup)
249 msr tcr_el1, x10 263 msr tcr_el1, x10
250 ret // return to head.S 264 ret // return to head.S
251ENDPROC(__cpu_setup) 265ENDPROC(__cpu_setup)
252
253 /*
254 * We set the desired value explicitly, including those of the
255 * reserved bits. The values of bits EE & E0E were set early in
256 * el2_setup, which are left untouched below.
257 *
258 * n n T
259 * U E WT T UD US IHBS
260 * CE0 XWHW CZ ME TEEA S
261 * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
262 * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved
263 * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings
264 */
265 .type crval, #object
266crval:
267 .word 0xfcffffff // clear
268 .word 0x34d5d91d // set
269 .popsection
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 401ceb71540c..c5f05c4a4d00 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -101,12 +101,12 @@ ENTRY(privcmd_call)
101 * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation 101 * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
102 * is enabled (it implies that hardware UAO and PAN disabled). 102 * is enabled (it implies that hardware UAO and PAN disabled).
103 */ 103 */
104 uaccess_ttbr0_enable x6, x7 104 uaccess_ttbr0_enable x6, x7, x8
105 hvc XEN_IMM 105 hvc XEN_IMM
106 106
107 /* 107 /*
108 * Disable userspace access from kernel once the hyp call completed. 108 * Disable userspace access from kernel once the hyp call completed.
109 */ 109 */
110 uaccess_ttbr0_disable x6 110 uaccess_ttbr0_disable x6, x7
111 ret 111 ret
112ENDPROC(privcmd_call); 112ENDPROC(privcmd_call);
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index fa87a055905e..e77f77caa0f3 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -48,6 +48,14 @@ config ARM_SCPI_POWER_DOMAIN
48 This enables support for the SCPI power domains which can be 48 This enables support for the SCPI power domains which can be
49 enabled or disabled via the SCP firmware 49 enabled or disabled via the SCP firmware
50 50
51config ARM_SDE_INTERFACE
52 bool "ARM Software Delegated Exception Interface (SDEI)"
53 depends on ARM64
54 help
55 The Software Delegated Exception Interface (SDEI) is an ARM
56 standard for registering callbacks from the platform firmware
57 into the OS. This is typically used to implement RAS notifications.
58
51config EDD 59config EDD
52 tristate "BIOS Enhanced Disk Drive calls determine boot disk" 60 tristate "BIOS Enhanced Disk Drive calls determine boot disk"
53 depends on X86 61 depends on X86
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index feaa890197f3..b248238ddc6a 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_ARM_PSCI_FW) += psci.o
6obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o 6obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o
7obj-$(CONFIG_ARM_SCPI_PROTOCOL) += arm_scpi.o 7obj-$(CONFIG_ARM_SCPI_PROTOCOL) += arm_scpi.o
8obj-$(CONFIG_ARM_SCPI_POWER_DOMAIN) += scpi_pm_domain.o 8obj-$(CONFIG_ARM_SCPI_POWER_DOMAIN) += scpi_pm_domain.o
9obj-$(CONFIG_ARM_SDE_INTERFACE) += arm_sdei.o
9obj-$(CONFIG_DMI) += dmi_scan.o 10obj-$(CONFIG_DMI) += dmi_scan.o
10obj-$(CONFIG_DMI_SYSFS) += dmi-sysfs.o 11obj-$(CONFIG_DMI_SYSFS) += dmi-sysfs.o
11obj-$(CONFIG_EDD) += edd.o 12obj-$(CONFIG_EDD) += edd.o
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
new file mode 100644
index 000000000000..1ea71640fdc2
--- /dev/null
+++ b/drivers/firmware/arm_sdei.c
@@ -0,0 +1,1092 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (C) 2017 Arm Ltd.
3#define pr_fmt(fmt) "sdei: " fmt
4
5#include <linux/acpi.h>
6#include <linux/arm_sdei.h>
7#include <linux/arm-smccc.h>
8#include <linux/atomic.h>
9#include <linux/bitops.h>
10#include <linux/compiler.h>
11#include <linux/cpuhotplug.h>
12#include <linux/cpu.h>
13#include <linux/cpu_pm.h>
14#include <linux/errno.h>
15#include <linux/hardirq.h>
16#include <linux/kernel.h>
17#include <linux/kprobes.h>
18#include <linux/kvm_host.h>
19#include <linux/list.h>
20#include <linux/mutex.h>
21#include <linux/notifier.h>
22#include <linux/of.h>
23#include <linux/of_platform.h>
24#include <linux/percpu.h>
25#include <linux/platform_device.h>
26#include <linux/pm.h>
27#include <linux/ptrace.h>
28#include <linux/preempt.h>
29#include <linux/reboot.h>
30#include <linux/slab.h>
31#include <linux/smp.h>
32#include <linux/spinlock.h>
33#include <linux/uaccess.h>
34
35/*
36 * The call to use to reach the firmware.
37 */
38static asmlinkage void (*sdei_firmware_call)(unsigned long function_id,
39 unsigned long arg0, unsigned long arg1,
40 unsigned long arg2, unsigned long arg3,
41 unsigned long arg4, struct arm_smccc_res *res);
42
43/* entry point from firmware to arch asm code */
44static unsigned long sdei_entry_point;
45
46struct sdei_event {
47 /* These three are protected by the sdei_list_lock */
48 struct list_head list;
49 bool reregister;
50 bool reenable;
51
52 u32 event_num;
53 u8 type;
54 u8 priority;
55
56 /* This pointer is handed to firmware as the event argument. */
57 union {
58 /* Shared events */
59 struct sdei_registered_event *registered;
60
61 /* CPU private events */
62 struct sdei_registered_event __percpu *private_registered;
63 };
64};
65
66/* Take the mutex for any API call or modification. Take the mutex first. */
67static DEFINE_MUTEX(sdei_events_lock);
68
69/* and then hold this when modifying the list */
70static DEFINE_SPINLOCK(sdei_list_lock);
71static LIST_HEAD(sdei_list);
72
73/* Private events are registered/enabled via IPI passing one of these */
74struct sdei_crosscall_args {
75 struct sdei_event *event;
76 atomic_t errors;
77 int first_error;
78};
79
80#define CROSSCALL_INIT(arg, event) (arg.event = event, \
81 arg.first_error = 0, \
82 atomic_set(&arg.errors, 0))
83
84static inline int sdei_do_cross_call(void *fn, struct sdei_event * event)
85{
86 struct sdei_crosscall_args arg;
87
88 CROSSCALL_INIT(arg, event);
89 on_each_cpu(fn, &arg, true);
90
91 return arg.first_error;
92}
93
94static inline void
95sdei_cross_call_return(struct sdei_crosscall_args *arg, int err)
96{
97 if (err && (atomic_inc_return(&arg->errors) == 1))
98 arg->first_error = err;
99}
100
101static int sdei_to_linux_errno(unsigned long sdei_err)
102{
103 switch (sdei_err) {
104 case SDEI_NOT_SUPPORTED:
105 return -EOPNOTSUPP;
106 case SDEI_INVALID_PARAMETERS:
107 return -EINVAL;
108 case SDEI_DENIED:
109 return -EPERM;
110 case SDEI_PENDING:
111 return -EINPROGRESS;
112 case SDEI_OUT_OF_RESOURCE:
113 return -ENOMEM;
114 }
115
116 /* Not an error value ... */
117 return sdei_err;
118}
119
120/*
121 * If x0 is any of these values, then the call failed, use sdei_to_linux_errno()
122 * to translate.
123 */
124static int sdei_is_err(struct arm_smccc_res *res)
125{
126 switch (res->a0) {
127 case SDEI_NOT_SUPPORTED:
128 case SDEI_INVALID_PARAMETERS:
129 case SDEI_DENIED:
130 case SDEI_PENDING:
131 case SDEI_OUT_OF_RESOURCE:
132 return true;
133 }
134
135 return false;
136}
137
138static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0,
139 unsigned long arg1, unsigned long arg2,
140 unsigned long arg3, unsigned long arg4,
141 u64 *result)
142{
143 int err = 0;
144 struct arm_smccc_res res;
145
146 if (sdei_firmware_call) {
147 sdei_firmware_call(function_id, arg0, arg1, arg2, arg3, arg4,
148 &res);
149 if (sdei_is_err(&res))
150 err = sdei_to_linux_errno(res.a0);
151 } else {
152 /*
153 * !sdei_firmware_call means we failed to probe or called
154 * sdei_mark_interface_broken(). -EIO is not an error returned
155 * by sdei_to_linux_errno() and is used to suppress messages
156 * from this driver.
157 */
158 err = -EIO;
159 res.a0 = SDEI_NOT_SUPPORTED;
160 }
161
162 if (result)
163 *result = res.a0;
164
165 return err;
166}
167
168static struct sdei_event *sdei_event_find(u32 event_num)
169{
170 struct sdei_event *e, *found = NULL;
171
172 lockdep_assert_held(&sdei_events_lock);
173
174 spin_lock(&sdei_list_lock);
175 list_for_each_entry(e, &sdei_list, list) {
176 if (e->event_num == event_num) {
177 found = e;
178 break;
179 }
180 }
181 spin_unlock(&sdei_list_lock);
182
183 return found;
184}
185
186int sdei_api_event_context(u32 query, u64 *result)
187{
188 return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_CONTEXT, query, 0, 0, 0, 0,
189 result);
190}
191NOKPROBE_SYMBOL(sdei_api_event_context);
192
193static int sdei_api_event_get_info(u32 event, u32 info, u64 *result)
194{
195 return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_GET_INFO, event, info, 0,
196 0, 0, result);
197}
198
199static struct sdei_event *sdei_event_create(u32 event_num,
200 sdei_event_callback *cb,
201 void *cb_arg)
202{
203 int err;
204 u64 result;
205 struct sdei_event *event;
206 struct sdei_registered_event *reg;
207
208 lockdep_assert_held(&sdei_events_lock);
209
210 event = kzalloc(sizeof(*event), GFP_KERNEL);
211 if (!event)
212 return ERR_PTR(-ENOMEM);
213
214 INIT_LIST_HEAD(&event->list);
215 event->event_num = event_num;
216
217 err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY,
218 &result);
219 if (err) {
220 kfree(event);
221 return ERR_PTR(err);
222 }
223 event->priority = result;
224
225 err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_TYPE,
226 &result);
227 if (err) {
228 kfree(event);
229 return ERR_PTR(err);
230 }
231 event->type = result;
232
233 if (event->type == SDEI_EVENT_TYPE_SHARED) {
234 reg = kzalloc(sizeof(*reg), GFP_KERNEL);
235 if (!reg) {
236 kfree(event);
237 return ERR_PTR(-ENOMEM);
238 }
239
240 reg->event_num = event_num;
241 reg->priority = event->priority;
242
243 reg->callback = cb;
244 reg->callback_arg = cb_arg;
245 event->registered = reg;
246 } else {
247 int cpu;
248 struct sdei_registered_event __percpu *regs;
249
250 regs = alloc_percpu(struct sdei_registered_event);
251 if (!regs) {
252 kfree(event);
253 return ERR_PTR(-ENOMEM);
254 }
255
256 for_each_possible_cpu(cpu) {
257 reg = per_cpu_ptr(regs, cpu);
258
259 reg->event_num = event->event_num;
260 reg->priority = event->priority;
261 reg->callback = cb;
262 reg->callback_arg = cb_arg;
263 }
264
265 event->private_registered = regs;
266 }
267
268 if (sdei_event_find(event_num)) {
269 kfree(event->registered);
270 kfree(event);
271 event = ERR_PTR(-EBUSY);
272 } else {
273 spin_lock(&sdei_list_lock);
274 list_add(&event->list, &sdei_list);
275 spin_unlock(&sdei_list_lock);
276 }
277
278 return event;
279}
280
281static void sdei_event_destroy(struct sdei_event *event)
282{
283 lockdep_assert_held(&sdei_events_lock);
284
285 spin_lock(&sdei_list_lock);
286 list_del(&event->list);
287 spin_unlock(&sdei_list_lock);
288
289 if (event->type == SDEI_EVENT_TYPE_SHARED)
290 kfree(event->registered);
291 else
292 free_percpu(event->private_registered);
293
294 kfree(event);
295}
296
297static int sdei_api_get_version(u64 *version)
298{
299 return invoke_sdei_fn(SDEI_1_0_FN_SDEI_VERSION, 0, 0, 0, 0, 0, version);
300}
301
302int sdei_mask_local_cpu(void)
303{
304 int err;
305
306 WARN_ON_ONCE(preemptible());
307
308 err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_MASK, 0, 0, 0, 0, 0, NULL);
309 if (err && err != -EIO) {
310 pr_warn_once("failed to mask CPU[%u]: %d\n",
311 smp_processor_id(), err);
312 return err;
313 }
314
315 return 0;
316}
317
318static void _ipi_mask_cpu(void *ignored)
319{
320 sdei_mask_local_cpu();
321}
322
323int sdei_unmask_local_cpu(void)
324{
325 int err;
326
327 WARN_ON_ONCE(preemptible());
328
329 err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_UNMASK, 0, 0, 0, 0, 0, NULL);
330 if (err && err != -EIO) {
331 pr_warn_once("failed to unmask CPU[%u]: %d\n",
332 smp_processor_id(), err);
333 return err;
334 }
335
336 return 0;
337}
338
339static void _ipi_unmask_cpu(void *ignored)
340{
341 sdei_unmask_local_cpu();
342}
343
344static void _ipi_private_reset(void *ignored)
345{
346 int err;
347
348 err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PRIVATE_RESET, 0, 0, 0, 0, 0,
349 NULL);
350 if (err && err != -EIO)
351 pr_warn_once("failed to reset CPU[%u]: %d\n",
352 smp_processor_id(), err);
353}
354
355static int sdei_api_shared_reset(void)
356{
357 return invoke_sdei_fn(SDEI_1_0_FN_SDEI_SHARED_RESET, 0, 0, 0, 0, 0,
358 NULL);
359}
360
361static void sdei_mark_interface_broken(void)
362{
363 pr_err("disabling SDEI firmware interface\n");
364 on_each_cpu(&_ipi_mask_cpu, NULL, true);
365 sdei_firmware_call = NULL;
366}
367
368static int sdei_platform_reset(void)
369{
370 int err;
371
372 on_each_cpu(&_ipi_private_reset, NULL, true);
373 err = sdei_api_shared_reset();
374 if (err) {
375 pr_err("Failed to reset platform: %d\n", err);
376 sdei_mark_interface_broken();
377 }
378
379 return err;
380}
381
382static int sdei_api_event_enable(u32 event_num)
383{
384 return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_ENABLE, event_num, 0, 0, 0,
385 0, NULL);
386}
387
388/* Called directly by the hotplug callbacks */
389static void _local_event_enable(void *data)
390{
391 int err;
392 struct sdei_crosscall_args *arg = data;
393
394 WARN_ON_ONCE(preemptible());
395
396 err = sdei_api_event_enable(arg->event->event_num);
397
398 sdei_cross_call_return(arg, err);
399}
400
401int sdei_event_enable(u32 event_num)
402{
403 int err = -EINVAL;
404 struct sdei_event *event;
405
406 mutex_lock(&sdei_events_lock);
407 event = sdei_event_find(event_num);
408 if (!event) {
409 mutex_unlock(&sdei_events_lock);
410 return -ENOENT;
411 }
412
413 spin_lock(&sdei_list_lock);
414 event->reenable = true;
415 spin_unlock(&sdei_list_lock);
416
417 if (event->type == SDEI_EVENT_TYPE_SHARED)
418 err = sdei_api_event_enable(event->event_num);
419 else
420 err = sdei_do_cross_call(_local_event_enable, event);
421 mutex_unlock(&sdei_events_lock);
422
423 return err;
424}
425EXPORT_SYMBOL(sdei_event_enable);
426
427static int sdei_api_event_disable(u32 event_num)
428{
429 return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_DISABLE, event_num, 0, 0,
430 0, 0, NULL);
431}
432
433static void _ipi_event_disable(void *data)
434{
435 int err;
436 struct sdei_crosscall_args *arg = data;
437
438 err = sdei_api_event_disable(arg->event->event_num);
439
440 sdei_cross_call_return(arg, err);
441}
442
443int sdei_event_disable(u32 event_num)
444{
445 int err = -EINVAL;
446 struct sdei_event *event;
447
448 mutex_lock(&sdei_events_lock);
449 event = sdei_event_find(event_num);
450 if (!event) {
451 mutex_unlock(&sdei_events_lock);
452 return -ENOENT;
453 }
454
455 spin_lock(&sdei_list_lock);
456 event->reenable = false;
457 spin_unlock(&sdei_list_lock);
458
459 if (event->type == SDEI_EVENT_TYPE_SHARED)
460 err = sdei_api_event_disable(event->event_num);
461 else
462 err = sdei_do_cross_call(_ipi_event_disable, event);
463 mutex_unlock(&sdei_events_lock);
464
465 return err;
466}
467EXPORT_SYMBOL(sdei_event_disable);
468
469static int sdei_api_event_unregister(u32 event_num)
470{
471 return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_UNREGISTER, event_num, 0,
472 0, 0, 0, NULL);
473}
474
475/* Called directly by the hotplug callbacks */
476static void _local_event_unregister(void *data)
477{
478 int err;
479 struct sdei_crosscall_args *arg = data;
480
481 WARN_ON_ONCE(preemptible());
482
483 err = sdei_api_event_unregister(arg->event->event_num);
484
485 sdei_cross_call_return(arg, err);
486}
487
488static int _sdei_event_unregister(struct sdei_event *event)
489{
490 lockdep_assert_held(&sdei_events_lock);
491
492 spin_lock(&sdei_list_lock);
493 event->reregister = false;
494 event->reenable = false;
495 spin_unlock(&sdei_list_lock);
496
497 if (event->type == SDEI_EVENT_TYPE_SHARED)
498 return sdei_api_event_unregister(event->event_num);
499
500 return sdei_do_cross_call(_local_event_unregister, event);
501}
502
503int sdei_event_unregister(u32 event_num)
504{
505 int err;
506 struct sdei_event *event;
507
508 WARN_ON(in_nmi());
509
510 mutex_lock(&sdei_events_lock);
511 event = sdei_event_find(event_num);
512 do {
513 if (!event) {
514 pr_warn("Event %u not registered\n", event_num);
515 err = -ENOENT;
516 break;
517 }
518
519 err = _sdei_event_unregister(event);
520 if (err)
521 break;
522
523 sdei_event_destroy(event);
524 } while (0);
525 mutex_unlock(&sdei_events_lock);
526
527 return err;
528}
529EXPORT_SYMBOL(sdei_event_unregister);
530
531/*
532 * unregister events, but don't destroy them as they are re-registered by
533 * sdei_reregister_shared().
534 */
535static int sdei_unregister_shared(void)
536{
537 int err = 0;
538 struct sdei_event *event;
539
540 mutex_lock(&sdei_events_lock);
541 spin_lock(&sdei_list_lock);
542 list_for_each_entry(event, &sdei_list, list) {
543 if (event->type != SDEI_EVENT_TYPE_SHARED)
544 continue;
545
546 err = _sdei_event_unregister(event);
547 if (err)
548 break;
549 }
550 spin_unlock(&sdei_list_lock);
551 mutex_unlock(&sdei_events_lock);
552
553 return err;
554}
555
556static int sdei_api_event_register(u32 event_num, unsigned long entry_point,
557 void *arg, u64 flags, u64 affinity)
558{
559 return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_REGISTER, event_num,
560 (unsigned long)entry_point, (unsigned long)arg,
561 flags, affinity, NULL);
562}
563
564/* Called directly by the hotplug callbacks */
565static void _local_event_register(void *data)
566{
567 int err;
568 struct sdei_registered_event *reg;
569 struct sdei_crosscall_args *arg = data;
570
571 WARN_ON(preemptible());
572
573 reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
574 err = sdei_api_event_register(arg->event->event_num, sdei_entry_point,
575 reg, 0, 0);
576
577 sdei_cross_call_return(arg, err);
578}
579
580static int _sdei_event_register(struct sdei_event *event)
581{
582 int err;
583
584 lockdep_assert_held(&sdei_events_lock);
585
586 spin_lock(&sdei_list_lock);
587 event->reregister = true;
588 spin_unlock(&sdei_list_lock);
589
590 if (event->type == SDEI_EVENT_TYPE_SHARED)
591 return sdei_api_event_register(event->event_num,
592 sdei_entry_point,
593 event->registered,
594 SDEI_EVENT_REGISTER_RM_ANY, 0);
595
596
597 err = sdei_do_cross_call(_local_event_register, event);
598 if (err) {
599 spin_lock(&sdei_list_lock);
600 event->reregister = false;
601 event->reenable = false;
602 spin_unlock(&sdei_list_lock);
603
604 sdei_do_cross_call(_local_event_unregister, event);
605 }
606
607 return err;
608}
609
610int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg)
611{
612 int err;
613 struct sdei_event *event;
614
615 WARN_ON(in_nmi());
616
617 mutex_lock(&sdei_events_lock);
618 do {
619 if (sdei_event_find(event_num)) {
620 pr_warn("Event %u already registered\n", event_num);
621 err = -EBUSY;
622 break;
623 }
624
625 event = sdei_event_create(event_num, cb, arg);
626 if (IS_ERR(event)) {
627 err = PTR_ERR(event);
628 pr_warn("Failed to create event %u: %d\n", event_num,
629 err);
630 break;
631 }
632
633 err = _sdei_event_register(event);
634 if (err) {
635 sdei_event_destroy(event);
636 pr_warn("Failed to register event %u: %d\n", event_num,
637 err);
638 }
639 } while (0);
640 mutex_unlock(&sdei_events_lock);
641
642 return err;
643}
644EXPORT_SYMBOL(sdei_event_register);
645
646static int sdei_reregister_event(struct sdei_event *event)
647{
648 int err;
649
650 lockdep_assert_held(&sdei_events_lock);
651
652 err = _sdei_event_register(event);
653 if (err) {
654 pr_err("Failed to re-register event %u\n", event->event_num);
655 sdei_event_destroy(event);
656 return err;
657 }
658
659 if (event->reenable) {
660 if (event->type == SDEI_EVENT_TYPE_SHARED)
661 err = sdei_api_event_enable(event->event_num);
662 else
663 err = sdei_do_cross_call(_local_event_enable, event);
664 }
665
666 if (err)
667 pr_err("Failed to re-enable event %u\n", event->event_num);
668
669 return err;
670}
671
672static int sdei_reregister_shared(void)
673{
674 int err = 0;
675 struct sdei_event *event;
676
677 mutex_lock(&sdei_events_lock);
678 spin_lock(&sdei_list_lock);
679 list_for_each_entry(event, &sdei_list, list) {
680 if (event->type != SDEI_EVENT_TYPE_SHARED)
681 continue;
682
683 if (event->reregister) {
684 err = sdei_reregister_event(event);
685 if (err)
686 break;
687 }
688 }
689 spin_unlock(&sdei_list_lock);
690 mutex_unlock(&sdei_events_lock);
691
692 return err;
693}
694
695static int sdei_cpuhp_down(unsigned int cpu)
696{
697 struct sdei_event *event;
698 struct sdei_crosscall_args arg;
699
700 /* un-register private events */
701 spin_lock(&sdei_list_lock);
702 list_for_each_entry(event, &sdei_list, list) {
703 if (event->type == SDEI_EVENT_TYPE_SHARED)
704 continue;
705
706 CROSSCALL_INIT(arg, event);
707 /* call the cross-call function locally... */
708 _local_event_unregister(&arg);
709 if (arg.first_error)
710 pr_err("Failed to unregister event %u: %d\n",
711 event->event_num, arg.first_error);
712 }
713 spin_unlock(&sdei_list_lock);
714
715 return sdei_mask_local_cpu();
716}
717
718static int sdei_cpuhp_up(unsigned int cpu)
719{
720 struct sdei_event *event;
721 struct sdei_crosscall_args arg;
722
723 /* re-register/enable private events */
724 spin_lock(&sdei_list_lock);
725 list_for_each_entry(event, &sdei_list, list) {
726 if (event->type == SDEI_EVENT_TYPE_SHARED)
727 continue;
728
729 if (event->reregister) {
730 CROSSCALL_INIT(arg, event);
731 /* call the cross-call function locally... */
732 _local_event_register(&arg);
733 if (arg.first_error)
734 pr_err("Failed to re-register event %u: %d\n",
735 event->event_num, arg.first_error);
736 }
737
738 if (event->reenable) {
739 CROSSCALL_INIT(arg, event);
740 _local_event_enable(&arg);
741 if (arg.first_error)
742 pr_err("Failed to re-enable event %u: %d\n",
743 event->event_num, arg.first_error);
744 }
745 }
746 spin_unlock(&sdei_list_lock);
747
748 return sdei_unmask_local_cpu();
749}
750
751/* When entering idle, mask/unmask events for this cpu */
752static int sdei_pm_notifier(struct notifier_block *nb, unsigned long action,
753 void *data)
754{
755 int rv;
756
757 switch (action) {
758 case CPU_PM_ENTER:
759 rv = sdei_mask_local_cpu();
760 break;
761 case CPU_PM_EXIT:
762 case CPU_PM_ENTER_FAILED:
763 rv = sdei_unmask_local_cpu();
764 break;
765 default:
766 return NOTIFY_DONE;
767 }
768
769 if (rv)
770 return notifier_from_errno(rv);
771
772 return NOTIFY_OK;
773}
774
775static struct notifier_block sdei_pm_nb = {
776 .notifier_call = sdei_pm_notifier,
777};
778
779static int sdei_device_suspend(struct device *dev)
780{
781 on_each_cpu(_ipi_mask_cpu, NULL, true);
782
783 return 0;
784}
785
786static int sdei_device_resume(struct device *dev)
787{
788 on_each_cpu(_ipi_unmask_cpu, NULL, true);
789
790 return 0;
791}
792
793/*
794 * We need all events to be reregistered when we resume from hibernate.
795 *
796 * The sequence is freeze->thaw. Reboot. freeze->restore. We unregister
797 * events during freeze, then re-register and re-enable them during thaw
798 * and restore.
799 */
800static int sdei_device_freeze(struct device *dev)
801{
802 int err;
803
804 /* unregister private events */
805 cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING);
806
807 err = sdei_unregister_shared();
808 if (err)
809 return err;
810
811 return 0;
812}
813
814static int sdei_device_thaw(struct device *dev)
815{
816 int err;
817
818 /* re-register shared events */
819 err = sdei_reregister_shared();
820 if (err) {
821 pr_warn("Failed to re-register shared events...\n");
822 sdei_mark_interface_broken();
823 return err;
824 }
825
826 err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI",
827 &sdei_cpuhp_up, &sdei_cpuhp_down);
828 if (err)
829 pr_warn("Failed to re-register CPU hotplug notifier...\n");
830
831 return err;
832}
833
834static int sdei_device_restore(struct device *dev)
835{
836 int err;
837
838 err = sdei_platform_reset();
839 if (err)
840 return err;
841
842 return sdei_device_thaw(dev);
843}
844
845static const struct dev_pm_ops sdei_pm_ops = {
846 .suspend = sdei_device_suspend,
847 .resume = sdei_device_resume,
848 .freeze = sdei_device_freeze,
849 .thaw = sdei_device_thaw,
850 .restore = sdei_device_restore,
851};
852
853/*
854 * Mask all CPUs and unregister all events on panic, reboot or kexec.
855 */
856static int sdei_reboot_notifier(struct notifier_block *nb, unsigned long action,
857 void *data)
858{
859 /*
860 * We are going to reset the interface, after this there is no point
861 * doing work when we take CPUs offline.
862 */
863 cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING);
864
865 sdei_platform_reset();
866
867 return NOTIFY_OK;
868}
869
870static struct notifier_block sdei_reboot_nb = {
871 .notifier_call = sdei_reboot_notifier,
872};
873
874static void sdei_smccc_smc(unsigned long function_id,
875 unsigned long arg0, unsigned long arg1,
876 unsigned long arg2, unsigned long arg3,
877 unsigned long arg4, struct arm_smccc_res *res)
878{
879 arm_smccc_smc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res);
880}
881
882static void sdei_smccc_hvc(unsigned long function_id,
883 unsigned long arg0, unsigned long arg1,
884 unsigned long arg2, unsigned long arg3,
885 unsigned long arg4, struct arm_smccc_res *res)
886{
887 arm_smccc_hvc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res);
888}
889
890static int sdei_get_conduit(struct platform_device *pdev)
891{
892 const char *method;
893 struct device_node *np = pdev->dev.of_node;
894
895 sdei_firmware_call = NULL;
896 if (np) {
897 if (of_property_read_string(np, "method", &method)) {
898 pr_warn("missing \"method\" property\n");
899 return CONDUIT_INVALID;
900 }
901
902 if (!strcmp("hvc", method)) {
903 sdei_firmware_call = &sdei_smccc_hvc;
904 return CONDUIT_HVC;
905 } else if (!strcmp("smc", method)) {
906 sdei_firmware_call = &sdei_smccc_smc;
907 return CONDUIT_SMC;
908 }
909
910 pr_warn("invalid \"method\" property: %s\n", method);
911 } else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) {
912 if (acpi_psci_use_hvc()) {
913 sdei_firmware_call = &sdei_smccc_hvc;
914 return CONDUIT_HVC;
915 } else {
916 sdei_firmware_call = &sdei_smccc_smc;
917 return CONDUIT_SMC;
918 }
919 }
920
921 return CONDUIT_INVALID;
922}
923
924static int sdei_probe(struct platform_device *pdev)
925{
926 int err;
927 u64 ver = 0;
928 int conduit;
929
930 conduit = sdei_get_conduit(pdev);
931 if (!sdei_firmware_call)
932 return 0;
933
934 err = sdei_api_get_version(&ver);
935 if (err == -EOPNOTSUPP)
936 pr_err("advertised but not implemented in platform firmware\n");
937 if (err) {
938 pr_err("Failed to get SDEI version: %d\n", err);
939 sdei_mark_interface_broken();
940 return err;
941 }
942
943 pr_info("SDEIv%d.%d (0x%x) detected in firmware.\n",
944 (int)SDEI_VERSION_MAJOR(ver), (int)SDEI_VERSION_MINOR(ver),
945 (int)SDEI_VERSION_VENDOR(ver));
946
947 if (SDEI_VERSION_MAJOR(ver) != 1) {
948 pr_warn("Conflicting SDEI version detected.\n");
949 sdei_mark_interface_broken();
950 return -EINVAL;
951 }
952
953 err = sdei_platform_reset();
954 if (err)
955 return err;
956
957 sdei_entry_point = sdei_arch_get_entry_point(conduit);
958 if (!sdei_entry_point) {
959 /* Not supported due to hardware or boot configuration */
960 sdei_mark_interface_broken();
961 return 0;
962 }
963
964 err = cpu_pm_register_notifier(&sdei_pm_nb);
965 if (err) {
966 pr_warn("Failed to register CPU PM notifier...\n");
967 goto error;
968 }
969
970 err = register_reboot_notifier(&sdei_reboot_nb);
971 if (err) {
972 pr_warn("Failed to register reboot notifier...\n");
973 goto remove_cpupm;
974 }
975
976 err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI",
977 &sdei_cpuhp_up, &sdei_cpuhp_down);
978 if (err) {
979 pr_warn("Failed to register CPU hotplug notifier...\n");
980 goto remove_reboot;
981 }
982
983 return 0;
984
985remove_reboot:
986 unregister_reboot_notifier(&sdei_reboot_nb);
987
988remove_cpupm:
989 cpu_pm_unregister_notifier(&sdei_pm_nb);
990
991error:
992 sdei_mark_interface_broken();
993 return err;
994}
995
996static const struct of_device_id sdei_of_match[] = {
997 { .compatible = "arm,sdei-1.0" },
998 {}
999};
1000
1001static struct platform_driver sdei_driver = {
1002 .driver = {
1003 .name = "sdei",
1004 .pm = &sdei_pm_ops,
1005 .of_match_table = sdei_of_match,
1006 },
1007 .probe = sdei_probe,
1008};
1009
1010static bool __init sdei_present_dt(void)
1011{
1012 struct platform_device *pdev;
1013 struct device_node *np, *fw_np;
1014
1015 fw_np = of_find_node_by_name(NULL, "firmware");
1016 if (!fw_np)
1017 return false;
1018
1019 np = of_find_matching_node(fw_np, sdei_of_match);
1020 of_node_put(fw_np);
1021 if (!np)
1022 return false;
1023
1024 pdev = of_platform_device_create(np, sdei_driver.driver.name, NULL);
1025 of_node_put(np);
1026 if (!pdev)
1027 return false;
1028
1029 return true;
1030}
1031
1032static bool __init sdei_present_acpi(void)
1033{
1034 acpi_status status;
1035 struct platform_device *pdev;
1036 struct acpi_table_header *sdei_table_header;
1037
1038 if (acpi_disabled)
1039 return false;
1040
1041 status = acpi_get_table(ACPI_SIG_SDEI, 0, &sdei_table_header);
1042 if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
1043 const char *msg = acpi_format_exception(status);
1044
1045 pr_info("Failed to get ACPI:SDEI table, %s\n", msg);
1046 }
1047 if (ACPI_FAILURE(status))
1048 return false;
1049
1050 pdev = platform_device_register_simple(sdei_driver.driver.name, 0, NULL,
1051 0);
1052 if (IS_ERR(pdev))
1053 return false;
1054
1055 return true;
1056}
1057
1058static int __init sdei_init(void)
1059{
1060 if (sdei_present_dt() || sdei_present_acpi())
1061 platform_driver_register(&sdei_driver);
1062
1063 return 0;
1064}
1065
1066/*
1067 * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
1068 * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
1069 * by device_initcall(). We want to be called in the middle.
1070 */
1071subsys_initcall_sync(sdei_init);
1072
1073int sdei_event_handler(struct pt_regs *regs,
1074 struct sdei_registered_event *arg)
1075{
1076 int err;
1077 mm_segment_t orig_addr_limit;
1078 u32 event_num = arg->event_num;
1079
1080 orig_addr_limit = get_fs();
1081 set_fs(USER_DS);
1082
1083 err = arg->callback(event_num, regs, arg->callback_arg);
1084 if (err)
1085 pr_err_ratelimited("event %u on CPU %u failed with error: %d\n",
1086 event_num, smp_processor_id(), err);
1087
1088 set_fs(orig_addr_limit);
1089
1090 return err;
1091}
1092NOKPROBE_SYMBOL(sdei_event_handler);
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index d687ca3d5049..8b25d31e8401 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -496,6 +496,8 @@ static void __init psci_init_migrate(void)
496static void __init psci_0_2_set_functions(void) 496static void __init psci_0_2_set_functions(void)
497{ 497{
498 pr_info("Using standard PSCI v0.2 function IDs\n"); 498 pr_info("Using standard PSCI v0.2 function IDs\n");
499 psci_ops.get_version = psci_get_version;
500
499 psci_function_id[PSCI_FN_CPU_SUSPEND] = 501 psci_function_id[PSCI_FN_CPU_SUSPEND] =
500 PSCI_FN_NATIVE(0_2, CPU_SUSPEND); 502 PSCI_FN_NATIVE(0_2, CPU_SUSPEND);
501 psci_ops.cpu_suspend = psci_cpu_suspend; 503 psci_ops.cpu_suspend = psci_cpu_suspend;
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index a18794128bf8..7c375443ede6 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -104,26 +104,17 @@ static int of_coresight_alloc_memory(struct device *dev,
104int of_coresight_get_cpu(const struct device_node *node) 104int of_coresight_get_cpu(const struct device_node *node)
105{ 105{
106 int cpu; 106 int cpu;
107 bool found; 107 struct device_node *dn;
108 struct device_node *dn, *np;
109 108
110 dn = of_parse_phandle(node, "cpu", 0); 109 dn = of_parse_phandle(node, "cpu", 0);
111
112 /* Affinity defaults to CPU0 */ 110 /* Affinity defaults to CPU0 */
113 if (!dn) 111 if (!dn)
114 return 0; 112 return 0;
115 113 cpu = of_cpu_node_to_id(dn);
116 for_each_possible_cpu(cpu) {
117 np = of_cpu_device_node_get(cpu);
118 found = (dn == np);
119 of_node_put(np);
120 if (found)
121 break;
122 }
123 of_node_put(dn); 114 of_node_put(dn);
124 115
125 /* Affinity to CPU0 if no cpu nodes are found */ 116 /* Affinity to CPU0 if no cpu nodes are found */
126 return found ? cpu : 0; 117 return (cpu < 0) ? 0 : cpu;
127} 118}
128EXPORT_SYMBOL_GPL(of_coresight_get_cpu); 119EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
129 120
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index a874777e9b9d..a57c0fbbd34a 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1070,31 +1070,6 @@ static int __init gic_validate_dist_version(void __iomem *dist_base)
1070 return 0; 1070 return 0;
1071} 1071}
1072 1072
1073static int get_cpu_number(struct device_node *dn)
1074{
1075 const __be32 *cell;
1076 u64 hwid;
1077 int cpu;
1078
1079 cell = of_get_property(dn, "reg", NULL);
1080 if (!cell)
1081 return -1;
1082
1083 hwid = of_read_number(cell, of_n_addr_cells(dn));
1084
1085 /*
1086 * Non affinity bits must be set to 0 in the DT
1087 */
1088 if (hwid & ~MPIDR_HWID_BITMASK)
1089 return -1;
1090
1091 for_each_possible_cpu(cpu)
1092 if (cpu_logical_map(cpu) == hwid)
1093 return cpu;
1094
1095 return -1;
1096}
1097
1098/* Create all possible partitions at boot time */ 1073/* Create all possible partitions at boot time */
1099static void __init gic_populate_ppi_partitions(struct device_node *gic_node) 1074static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
1100{ 1075{
@@ -1145,8 +1120,8 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
1145 if (WARN_ON(!cpu_node)) 1120 if (WARN_ON(!cpu_node))
1146 continue; 1121 continue;
1147 1122
1148 cpu = get_cpu_number(cpu_node); 1123 cpu = of_cpu_node_to_id(cpu_node);
1149 if (WARN_ON(cpu == -1)) 1124 if (WARN_ON(cpu < 0))
1150 continue; 1125 continue;
1151 1126
1152 pr_cont("%pOF[%d] ", cpu_node, cpu); 1127 pr_cont("%pOF[%d] ", cpu_node, cpu);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 26618ba8f92a..a9d6fe86585b 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -316,6 +316,32 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
316EXPORT_SYMBOL(of_get_cpu_node); 316EXPORT_SYMBOL(of_get_cpu_node);
317 317
318/** 318/**
319 * of_cpu_node_to_id: Get the logical CPU number for a given device_node
320 *
321 * @cpu_node: Pointer to the device_node for CPU.
322 *
323 * Returns the logical CPU number of the given CPU device_node.
324 * Returns -ENODEV if the CPU is not found.
325 */
326int of_cpu_node_to_id(struct device_node *cpu_node)
327{
328 int cpu;
329 bool found = false;
330 struct device_node *np;
331
332 for_each_possible_cpu(cpu) {
333 np = of_cpu_device_node_get(cpu);
334 found = (cpu_node == np);
335 of_node_put(np);
336 if (found)
337 return cpu;
338 }
339
340 return -ENODEV;
341}
342EXPORT_SYMBOL(of_cpu_node_to_id);
343
344/**
319 * __of_device_is_compatible() - Check if the node matches given constraints 345 * __of_device_is_compatible() - Check if the node matches given constraints
320 * @device: pointer to node 346 * @device: pointer to node
321 * @compat: required compatible string, NULL or "" for any match 347 * @compat: required compatible string, NULL or "" for any match
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index b8f44b068fc6..da5724cd89cf 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -17,6 +17,15 @@ config ARM_PMU_ACPI
17 depends on ARM_PMU && ACPI 17 depends on ARM_PMU && ACPI
18 def_bool y 18 def_bool y
19 19
20config ARM_DSU_PMU
21 tristate "ARM DynamIQ Shared Unit (DSU) PMU"
22 depends on ARM64
23 help
24 Provides support for performance monitor unit in ARM DynamIQ Shared
25 Unit (DSU). The DSU integrates one or more cores with an L3 memory
26 system, control logic. The PMU allows counting various events related
27 to DSU.
28
20config HISI_PMU 29config HISI_PMU
21 bool "HiSilicon SoC PMU" 30 bool "HiSilicon SoC PMU"
22 depends on ARM64 && ACPI 31 depends on ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 710a0135bd61..c2f27419bdf0 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,4 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
2obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o 3obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
3obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o 4obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
4obj-$(CONFIG_HISI_PMU) += hisilicon/ 5obj-$(CONFIG_HISI_PMU) += hisilicon/
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
new file mode 100644
index 000000000000..93c50e377507
--- /dev/null
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -0,0 +1,843 @@
1/*
2 * ARM DynamIQ Shared Unit (DSU) PMU driver
3 *
4 * Copyright (C) ARM Limited, 2017.
5 *
6 * Based on ARM CCI-PMU, ARMv8 PMU-v3 drivers.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * version 2 as published by the Free Software Foundation.
11 */
12
13#define PMUNAME "arm_dsu"
14#define DRVNAME PMUNAME "_pmu"
15#define pr_fmt(fmt) DRVNAME ": " fmt
16
17#include <linux/bitmap.h>
18#include <linux/bitops.h>
19#include <linux/bug.h>
20#include <linux/cpumask.h>
21#include <linux/device.h>
22#include <linux/interrupt.h>
23#include <linux/kernel.h>
24#include <linux/module.h>
25#include <linux/of_device.h>
26#include <linux/perf_event.h>
27#include <linux/platform_device.h>
28#include <linux/spinlock.h>
29#include <linux/smp.h>
30#include <linux/sysfs.h>
31#include <linux/types.h>
32
33#include <asm/arm_dsu_pmu.h>
34#include <asm/local64.h>
35
36/* PMU event codes */
37#define DSU_PMU_EVT_CYCLES 0x11
38#define DSU_PMU_EVT_CHAIN 0x1e
39
40#define DSU_PMU_MAX_COMMON_EVENTS 0x40
41
42#define DSU_PMU_MAX_HW_CNTRS 32
43#define DSU_PMU_HW_COUNTER_MASK (DSU_PMU_MAX_HW_CNTRS - 1)
44
45#define CLUSTERPMCR_E BIT(0)
46#define CLUSTERPMCR_P BIT(1)
47#define CLUSTERPMCR_C BIT(2)
48#define CLUSTERPMCR_N_SHIFT 11
49#define CLUSTERPMCR_N_MASK 0x1f
50#define CLUSTERPMCR_IDCODE_SHIFT 16
51#define CLUSTERPMCR_IDCODE_MASK 0xff
52#define CLUSTERPMCR_IMP_SHIFT 24
53#define CLUSTERPMCR_IMP_MASK 0xff
54#define CLUSTERPMCR_RES_MASK 0x7e8
55#define CLUSTERPMCR_RES_VAL 0x40
56
57#define DSU_ACTIVE_CPU_MASK 0x0
58#define DSU_ASSOCIATED_CPU_MASK 0x1
59
60/*
61 * We use the index of the counters as they appear in the counter
62 * bit maps in the PMU registers (e.g CLUSTERPMSELR).
63 * i.e,
64 * counter 0 - Bit 0
65 * counter 1 - Bit 1
66 * ...
67 * Cycle counter - Bit 31
68 */
69#define DSU_PMU_IDX_CYCLE_COUNTER 31
70
71/* All event counters are 32bit, with a 64bit Cycle counter */
72#define DSU_PMU_COUNTER_WIDTH(idx) \
73 (((idx) == DSU_PMU_IDX_CYCLE_COUNTER) ? 64 : 32)
74
75#define DSU_PMU_COUNTER_MASK(idx) \
76 GENMASK_ULL((DSU_PMU_COUNTER_WIDTH((idx)) - 1), 0)
77
78#define DSU_EXT_ATTR(_name, _func, _config) \
79 (&((struct dev_ext_attribute[]) { \
80 { \
81 .attr = __ATTR(_name, 0444, _func, NULL), \
82 .var = (void *)_config \
83 } \
84 })[0].attr.attr)
85
86#define DSU_EVENT_ATTR(_name, _config) \
87 DSU_EXT_ATTR(_name, dsu_pmu_sysfs_event_show, (unsigned long)_config)
88
89#define DSU_FORMAT_ATTR(_name, _config) \
90 DSU_EXT_ATTR(_name, dsu_pmu_sysfs_format_show, (char *)_config)
91
92#define DSU_CPUMASK_ATTR(_name, _config) \
93 DSU_EXT_ATTR(_name, dsu_pmu_cpumask_show, (unsigned long)_config)
94
95struct dsu_hw_events {
96 DECLARE_BITMAP(used_mask, DSU_PMU_MAX_HW_CNTRS);
97 struct perf_event *events[DSU_PMU_MAX_HW_CNTRS];
98};
99
100/*
101 * struct dsu_pmu - DSU PMU descriptor
102 *
103 * @pmu_lock : Protects accesses to DSU PMU register from normal vs
104 * interrupt handler contexts.
105 * @hw_events : Holds the event counter state.
106 * @associated_cpus : CPUs attached to the DSU.
107 * @active_cpu : CPU to which the PMU is bound for accesses.
108 * @cpuhp_node : Node for CPU hotplug notifier link.
109 * @num_counters : Number of event counters implemented by the PMU,
110 * excluding the cycle counter.
111 * @irq : Interrupt line for counter overflow.
112 * @cpmceid_bitmap : Bitmap for the availability of architected common
113 * events (event_code < 0x40).
114 */
115struct dsu_pmu {
116 struct pmu pmu;
117 struct device *dev;
118 raw_spinlock_t pmu_lock;
119 struct dsu_hw_events hw_events;
120 cpumask_t associated_cpus;
121 cpumask_t active_cpu;
122 struct hlist_node cpuhp_node;
123 s8 num_counters;
124 int irq;
125 DECLARE_BITMAP(cpmceid_bitmap, DSU_PMU_MAX_COMMON_EVENTS);
126};
127
128static unsigned long dsu_pmu_cpuhp_state;
129
130static inline struct dsu_pmu *to_dsu_pmu(struct pmu *pmu)
131{
132 return container_of(pmu, struct dsu_pmu, pmu);
133}
134
135static ssize_t dsu_pmu_sysfs_event_show(struct device *dev,
136 struct device_attribute *attr,
137 char *buf)
138{
139 struct dev_ext_attribute *eattr = container_of(attr,
140 struct dev_ext_attribute, attr);
141 return snprintf(buf, PAGE_SIZE, "event=0x%lx\n",
142 (unsigned long)eattr->var);
143}
144
145static ssize_t dsu_pmu_sysfs_format_show(struct device *dev,
146 struct device_attribute *attr,
147 char *buf)
148{
149 struct dev_ext_attribute *eattr = container_of(attr,
150 struct dev_ext_attribute, attr);
151 return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var);
152}
153
154static ssize_t dsu_pmu_cpumask_show(struct device *dev,
155 struct device_attribute *attr,
156 char *buf)
157{
158 struct pmu *pmu = dev_get_drvdata(dev);
159 struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
160 struct dev_ext_attribute *eattr = container_of(attr,
161 struct dev_ext_attribute, attr);
162 unsigned long mask_id = (unsigned long)eattr->var;
163 const cpumask_t *cpumask;
164
165 switch (mask_id) {
166 case DSU_ACTIVE_CPU_MASK:
167 cpumask = &dsu_pmu->active_cpu;
168 break;
169 case DSU_ASSOCIATED_CPU_MASK:
170 cpumask = &dsu_pmu->associated_cpus;
171 break;
172 default:
173 return 0;
174 }
175 return cpumap_print_to_pagebuf(true, buf, cpumask);
176}
177
178static struct attribute *dsu_pmu_format_attrs[] = {
179 DSU_FORMAT_ATTR(event, "config:0-31"),
180 NULL,
181};
182
183static const struct attribute_group dsu_pmu_format_attr_group = {
184 .name = "format",
185 .attrs = dsu_pmu_format_attrs,
186};
187
188static struct attribute *dsu_pmu_event_attrs[] = {
189 DSU_EVENT_ATTR(cycles, 0x11),
190 DSU_EVENT_ATTR(bus_access, 0x19),
191 DSU_EVENT_ATTR(memory_error, 0x1a),
192 DSU_EVENT_ATTR(bus_cycles, 0x1d),
193 DSU_EVENT_ATTR(l3d_cache_allocate, 0x29),
194 DSU_EVENT_ATTR(l3d_cache_refill, 0x2a),
195 DSU_EVENT_ATTR(l3d_cache, 0x2b),
196 DSU_EVENT_ATTR(l3d_cache_wb, 0x2c),
197 NULL,
198};
199
200static umode_t
201dsu_pmu_event_attr_is_visible(struct kobject *kobj, struct attribute *attr,
202 int unused)
203{
204 struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
205 struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
206 struct dev_ext_attribute *eattr = container_of(attr,
207 struct dev_ext_attribute, attr.attr);
208 unsigned long evt = (unsigned long)eattr->var;
209
210 return test_bit(evt, dsu_pmu->cpmceid_bitmap) ? attr->mode : 0;
211}
212
213static const struct attribute_group dsu_pmu_events_attr_group = {
214 .name = "events",
215 .attrs = dsu_pmu_event_attrs,
216 .is_visible = dsu_pmu_event_attr_is_visible,
217};
218
219static struct attribute *dsu_pmu_cpumask_attrs[] = {
220 DSU_CPUMASK_ATTR(cpumask, DSU_ACTIVE_CPU_MASK),
221 DSU_CPUMASK_ATTR(associated_cpus, DSU_ASSOCIATED_CPU_MASK),
222 NULL,
223};
224
225static const struct attribute_group dsu_pmu_cpumask_attr_group = {
226 .attrs = dsu_pmu_cpumask_attrs,
227};
228
229static const struct attribute_group *dsu_pmu_attr_groups[] = {
230 &dsu_pmu_cpumask_attr_group,
231 &dsu_pmu_events_attr_group,
232 &dsu_pmu_format_attr_group,
233 NULL,
234};
235
236static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu)
237{
238 struct cpumask online_supported;
239
240 cpumask_and(&online_supported,
241 &dsu_pmu->associated_cpus, cpu_online_mask);
242 return cpumask_any_but(&online_supported, cpu);
243}
244
245static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx)
246{
247 return (idx < dsu_pmu->num_counters) ||
248 (idx == DSU_PMU_IDX_CYCLE_COUNTER);
249}
250
251static inline u64 dsu_pmu_read_counter(struct perf_event *event)
252{
253 u64 val;
254 unsigned long flags;
255 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
256 int idx = event->hw.idx;
257
258 if (WARN_ON(!cpumask_test_cpu(smp_processor_id(),
259 &dsu_pmu->associated_cpus)))
260 return 0;
261
262 if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
263 dev_err(event->pmu->dev,
264 "Trying reading invalid counter %d\n", idx);
265 return 0;
266 }
267
268 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
269 if (idx == DSU_PMU_IDX_CYCLE_COUNTER)
270 val = __dsu_pmu_read_pmccntr();
271 else
272 val = __dsu_pmu_read_counter(idx);
273 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
274
275 return val;
276}
277
278static void dsu_pmu_write_counter(struct perf_event *event, u64 val)
279{
280 unsigned long flags;
281 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
282 int idx = event->hw.idx;
283
284 if (WARN_ON(!cpumask_test_cpu(smp_processor_id(),
285 &dsu_pmu->associated_cpus)))
286 return;
287
288 if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
289 dev_err(event->pmu->dev,
290 "writing to invalid counter %d\n", idx);
291 return;
292 }
293
294 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
295 if (idx == DSU_PMU_IDX_CYCLE_COUNTER)
296 __dsu_pmu_write_pmccntr(val);
297 else
298 __dsu_pmu_write_counter(idx, val);
299 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
300}
301
302static int dsu_pmu_get_event_idx(struct dsu_hw_events *hw_events,
303 struct perf_event *event)
304{
305 int idx;
306 unsigned long evtype = event->attr.config;
307 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
308 unsigned long *used_mask = hw_events->used_mask;
309
310 if (evtype == DSU_PMU_EVT_CYCLES) {
311 if (test_and_set_bit(DSU_PMU_IDX_CYCLE_COUNTER, used_mask))
312 return -EAGAIN;
313 return DSU_PMU_IDX_CYCLE_COUNTER;
314 }
315
316 idx = find_first_zero_bit(used_mask, dsu_pmu->num_counters);
317 if (idx >= dsu_pmu->num_counters)
318 return -EAGAIN;
319 set_bit(idx, hw_events->used_mask);
320 return idx;
321}
322
323static void dsu_pmu_enable_counter(struct dsu_pmu *dsu_pmu, int idx)
324{
325 __dsu_pmu_counter_interrupt_enable(idx);
326 __dsu_pmu_enable_counter(idx);
327}
328
329static void dsu_pmu_disable_counter(struct dsu_pmu *dsu_pmu, int idx)
330{
331 __dsu_pmu_disable_counter(idx);
332 __dsu_pmu_counter_interrupt_disable(idx);
333}
334
335static inline void dsu_pmu_set_event(struct dsu_pmu *dsu_pmu,
336 struct perf_event *event)
337{
338 int idx = event->hw.idx;
339 unsigned long flags;
340
341 if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
342 dev_err(event->pmu->dev,
343 "Trying to set invalid counter %d\n", idx);
344 return;
345 }
346
347 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
348 __dsu_pmu_set_event(idx, event->hw.config_base);
349 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
350}
351
352static void dsu_pmu_event_update(struct perf_event *event)
353{
354 struct hw_perf_event *hwc = &event->hw;
355 u64 delta, prev_count, new_count;
356
357 do {
358 /* We may also be called from the irq handler */
359 prev_count = local64_read(&hwc->prev_count);
360 new_count = dsu_pmu_read_counter(event);
361 } while (local64_cmpxchg(&hwc->prev_count, prev_count, new_count) !=
362 prev_count);
363 delta = (new_count - prev_count) & DSU_PMU_COUNTER_MASK(hwc->idx);
364 local64_add(delta, &event->count);
365}
366
367static void dsu_pmu_read(struct perf_event *event)
368{
369 dsu_pmu_event_update(event);
370}
371
372static inline u32 dsu_pmu_get_reset_overflow(void)
373{
374 return __dsu_pmu_get_reset_overflow();
375}
376
377/**
378 * dsu_pmu_set_event_period: Set the period for the counter.
379 *
380 * All DSU PMU event counters, except the cycle counter are 32bit
381 * counters. To handle cases of extreme interrupt latency, we program
382 * the counter with half of the max count for the counters.
383 */
384static void dsu_pmu_set_event_period(struct perf_event *event)
385{
386 int idx = event->hw.idx;
387 u64 val = DSU_PMU_COUNTER_MASK(idx) >> 1;
388
389 local64_set(&event->hw.prev_count, val);
390 dsu_pmu_write_counter(event, val);
391}
392
393static irqreturn_t dsu_pmu_handle_irq(int irq_num, void *dev)
394{
395 int i;
396 bool handled = false;
397 struct dsu_pmu *dsu_pmu = dev;
398 struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
399 unsigned long overflow;
400
401 overflow = dsu_pmu_get_reset_overflow();
402 if (!overflow)
403 return IRQ_NONE;
404
405 for_each_set_bit(i, &overflow, DSU_PMU_MAX_HW_CNTRS) {
406 struct perf_event *event = hw_events->events[i];
407
408 if (!event)
409 continue;
410 dsu_pmu_event_update(event);
411 dsu_pmu_set_event_period(event);
412 handled = true;
413 }
414
415 return IRQ_RETVAL(handled);
416}
417
418static void dsu_pmu_start(struct perf_event *event, int pmu_flags)
419{
420 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
421
422 /* We always reprogram the counter */
423 if (pmu_flags & PERF_EF_RELOAD)
424 WARN_ON(!(event->hw.state & PERF_HES_UPTODATE));
425 dsu_pmu_set_event_period(event);
426 if (event->hw.idx != DSU_PMU_IDX_CYCLE_COUNTER)
427 dsu_pmu_set_event(dsu_pmu, event);
428 event->hw.state = 0;
429 dsu_pmu_enable_counter(dsu_pmu, event->hw.idx);
430}
431
432static void dsu_pmu_stop(struct perf_event *event, int pmu_flags)
433{
434 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
435
436 if (event->hw.state & PERF_HES_STOPPED)
437 return;
438 dsu_pmu_disable_counter(dsu_pmu, event->hw.idx);
439 dsu_pmu_event_update(event);
440 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
441}
442
443static int dsu_pmu_add(struct perf_event *event, int flags)
444{
445 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
446 struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
447 struct hw_perf_event *hwc = &event->hw;
448 int idx;
449
450 if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
451 &dsu_pmu->associated_cpus)))
452 return -ENOENT;
453
454 idx = dsu_pmu_get_event_idx(hw_events, event);
455 if (idx < 0)
456 return idx;
457
458 hwc->idx = idx;
459 hw_events->events[idx] = event;
460 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
461
462 if (flags & PERF_EF_START)
463 dsu_pmu_start(event, PERF_EF_RELOAD);
464
465 perf_event_update_userpage(event);
466 return 0;
467}
468
469static void dsu_pmu_del(struct perf_event *event, int flags)
470{
471 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
472 struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
473 struct hw_perf_event *hwc = &event->hw;
474 int idx = hwc->idx;
475
476 dsu_pmu_stop(event, PERF_EF_UPDATE);
477 hw_events->events[idx] = NULL;
478 clear_bit(idx, hw_events->used_mask);
479 perf_event_update_userpage(event);
480}
481
482static void dsu_pmu_enable(struct pmu *pmu)
483{
484 u32 pmcr;
485 unsigned long flags;
486 struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
487
488 /* If no counters are added, skip enabling the PMU */
489 if (bitmap_empty(dsu_pmu->hw_events.used_mask, DSU_PMU_MAX_HW_CNTRS))
490 return;
491
492 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
493 pmcr = __dsu_pmu_read_pmcr();
494 pmcr |= CLUSTERPMCR_E;
495 __dsu_pmu_write_pmcr(pmcr);
496 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
497}
498
499static void dsu_pmu_disable(struct pmu *pmu)
500{
501 u32 pmcr;
502 unsigned long flags;
503 struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
504
505 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
506 pmcr = __dsu_pmu_read_pmcr();
507 pmcr &= ~CLUSTERPMCR_E;
508 __dsu_pmu_write_pmcr(pmcr);
509 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
510}
511
512static bool dsu_pmu_validate_event(struct pmu *pmu,
513 struct dsu_hw_events *hw_events,
514 struct perf_event *event)
515{
516 if (is_software_event(event))
517 return true;
518 /* Reject groups spanning multiple HW PMUs. */
519 if (event->pmu != pmu)
520 return false;
521 return dsu_pmu_get_event_idx(hw_events, event) >= 0;
522}
523
524/*
525 * Make sure the group of events can be scheduled at once
526 * on the PMU.
527 */
528static bool dsu_pmu_validate_group(struct perf_event *event)
529{
530 struct perf_event *sibling, *leader = event->group_leader;
531 struct dsu_hw_events fake_hw;
532
533 if (event->group_leader == event)
534 return true;
535
536 memset(fake_hw.used_mask, 0, sizeof(fake_hw.used_mask));
537 if (!dsu_pmu_validate_event(event->pmu, &fake_hw, leader))
538 return false;
539 list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
540 if (!dsu_pmu_validate_event(event->pmu, &fake_hw, sibling))
541 return false;
542 }
543 return dsu_pmu_validate_event(event->pmu, &fake_hw, event);
544}
545
546static int dsu_pmu_event_init(struct perf_event *event)
547{
548 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
549
550 if (event->attr.type != event->pmu->type)
551 return -ENOENT;
552
553 /* We don't support sampling */
554 if (is_sampling_event(event)) {
555 dev_dbg(dsu_pmu->pmu.dev, "Can't support sampling events\n");
556 return -EOPNOTSUPP;
557 }
558
559 /* We cannot support task bound events */
560 if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
561 dev_dbg(dsu_pmu->pmu.dev, "Can't support per-task counters\n");
562 return -EINVAL;
563 }
564
565 if (has_branch_stack(event) ||
566 event->attr.exclude_user ||
567 event->attr.exclude_kernel ||
568 event->attr.exclude_hv ||
569 event->attr.exclude_idle ||
570 event->attr.exclude_host ||
571 event->attr.exclude_guest) {
572 dev_dbg(dsu_pmu->pmu.dev, "Can't support filtering\n");
573 return -EINVAL;
574 }
575
576 if (!cpumask_test_cpu(event->cpu, &dsu_pmu->associated_cpus)) {
577 dev_dbg(dsu_pmu->pmu.dev,
578 "Requested cpu is not associated with the DSU\n");
579 return -EINVAL;
580 }
581 /*
582 * Choose the current active CPU to read the events. We don't want
583 * to migrate the event contexts, irq handling etc to the requested
584 * CPU. As long as the requested CPU is within the same DSU, we
585 * are fine.
586 */
587 event->cpu = cpumask_first(&dsu_pmu->active_cpu);
588 if (event->cpu >= nr_cpu_ids)
589 return -EINVAL;
590 if (!dsu_pmu_validate_group(event))
591 return -EINVAL;
592
593 event->hw.config_base = event->attr.config;
594 return 0;
595}
596
597static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev)
598{
599 struct dsu_pmu *dsu_pmu;
600
601 dsu_pmu = devm_kzalloc(&pdev->dev, sizeof(*dsu_pmu), GFP_KERNEL);
602 if (!dsu_pmu)
603 return ERR_PTR(-ENOMEM);
604
605 raw_spin_lock_init(&dsu_pmu->pmu_lock);
606 /*
607 * Initialise the number of counters to -1, until we probe
608 * the real number on a connected CPU.
609 */
610 dsu_pmu->num_counters = -1;
611 return dsu_pmu;
612}
613
614/**
615 * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster.
616 */
617static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask)
618{
619 int i = 0, n, cpu;
620 struct device_node *cpu_node;
621
622 n = of_count_phandle_with_args(dev, "cpus", NULL);
623 if (n <= 0)
624 return -ENODEV;
625 for (; i < n; i++) {
626 cpu_node = of_parse_phandle(dev, "cpus", i);
627 if (!cpu_node)
628 break;
629 cpu = of_cpu_node_to_id(cpu_node);
630 of_node_put(cpu_node);
631 /*
632 * We have to ignore the failures here and continue scanning
633 * the list to handle cases where the nr_cpus could be capped
634 * in the running kernel.
635 */
636 if (cpu < 0)
637 continue;
638 cpumask_set_cpu(cpu, mask);
639 }
640 return 0;
641}
642
643/*
644 * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster.
645 */
646static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu)
647{
648 u64 num_counters;
649 u32 cpmceid[2];
650
651 num_counters = (__dsu_pmu_read_pmcr() >> CLUSTERPMCR_N_SHIFT) &
652 CLUSTERPMCR_N_MASK;
653 /* We can only support up to 31 independent counters */
654 if (WARN_ON(num_counters > 31))
655 num_counters = 31;
656 dsu_pmu->num_counters = num_counters;
657 if (!dsu_pmu->num_counters)
658 return;
659 cpmceid[0] = __dsu_pmu_read_pmceid(0);
660 cpmceid[1] = __dsu_pmu_read_pmceid(1);
661 bitmap_from_u32array(dsu_pmu->cpmceid_bitmap,
662 DSU_PMU_MAX_COMMON_EVENTS,
663 cpmceid,
664 ARRAY_SIZE(cpmceid));
665}
666
667static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu)
668{
669 cpumask_set_cpu(cpu, &dsu_pmu->active_cpu);
670 if (irq_set_affinity_hint(dsu_pmu->irq, &dsu_pmu->active_cpu))
671 pr_warn("Failed to set irq affinity to %d\n", cpu);
672}
673
674/*
675 * dsu_pmu_init_pmu: Initialise the DSU PMU configurations if
676 * we haven't done it already.
677 */
678static void dsu_pmu_init_pmu(struct dsu_pmu *dsu_pmu)
679{
680 if (dsu_pmu->num_counters == -1)
681 dsu_pmu_probe_pmu(dsu_pmu);
682 /* Reset the interrupt overflow mask */
683 dsu_pmu_get_reset_overflow();
684}
685
686static int dsu_pmu_device_probe(struct platform_device *pdev)
687{
688 int irq, rc;
689 struct dsu_pmu *dsu_pmu;
690 char *name;
691 static atomic_t pmu_idx = ATOMIC_INIT(-1);
692
693 dsu_pmu = dsu_pmu_alloc(pdev);
694 if (IS_ERR(dsu_pmu))
695 return PTR_ERR(dsu_pmu);
696
697 rc = dsu_pmu_dt_get_cpus(pdev->dev.of_node, &dsu_pmu->associated_cpus);
698 if (rc) {
699 dev_warn(&pdev->dev, "Failed to parse the CPUs\n");
700 return rc;
701 }
702
703 irq = platform_get_irq(pdev, 0);
704 if (irq < 0) {
705 dev_warn(&pdev->dev, "Failed to find IRQ\n");
706 return -EINVAL;
707 }
708
709 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_%d",
710 PMUNAME, atomic_inc_return(&pmu_idx));
711 if (!name)
712 return -ENOMEM;
713 rc = devm_request_irq(&pdev->dev, irq, dsu_pmu_handle_irq,
714 IRQF_NOBALANCING, name, dsu_pmu);
715 if (rc) {
716 dev_warn(&pdev->dev, "Failed to request IRQ %d\n", irq);
717 return rc;
718 }
719
720 dsu_pmu->irq = irq;
721 platform_set_drvdata(pdev, dsu_pmu);
722 rc = cpuhp_state_add_instance(dsu_pmu_cpuhp_state,
723 &dsu_pmu->cpuhp_node);
724 if (rc)
725 return rc;
726
727 dsu_pmu->pmu = (struct pmu) {
728 .task_ctx_nr = perf_invalid_context,
729 .module = THIS_MODULE,
730 .pmu_enable = dsu_pmu_enable,
731 .pmu_disable = dsu_pmu_disable,
732 .event_init = dsu_pmu_event_init,
733 .add = dsu_pmu_add,
734 .del = dsu_pmu_del,
735 .start = dsu_pmu_start,
736 .stop = dsu_pmu_stop,
737 .read = dsu_pmu_read,
738
739 .attr_groups = dsu_pmu_attr_groups,
740 };
741
742 rc = perf_pmu_register(&dsu_pmu->pmu, name, -1);
743 if (rc) {
744 cpuhp_state_remove_instance(dsu_pmu_cpuhp_state,
745 &dsu_pmu->cpuhp_node);
746 irq_set_affinity_hint(dsu_pmu->irq, NULL);
747 }
748
749 return rc;
750}
751
752static int dsu_pmu_device_remove(struct platform_device *pdev)
753{
754 struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev);
755
756 perf_pmu_unregister(&dsu_pmu->pmu);
757 cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node);
758 irq_set_affinity_hint(dsu_pmu->irq, NULL);
759
760 return 0;
761}
762
763static const struct of_device_id dsu_pmu_of_match[] = {
764 { .compatible = "arm,dsu-pmu", },
765 {},
766};
767
768static struct platform_driver dsu_pmu_driver = {
769 .driver = {
770 .name = DRVNAME,
771 .of_match_table = of_match_ptr(dsu_pmu_of_match),
772 },
773 .probe = dsu_pmu_device_probe,
774 .remove = dsu_pmu_device_remove,
775};
776
777static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
778{
779 struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
780 cpuhp_node);
781
782 if (!cpumask_test_cpu(cpu, &dsu_pmu->associated_cpus))
783 return 0;
784
785 /* If the PMU is already managed, there is nothing to do */
786 if (!cpumask_empty(&dsu_pmu->active_cpu))
787 return 0;
788
789 dsu_pmu_init_pmu(dsu_pmu);
790 dsu_pmu_set_active_cpu(cpu, dsu_pmu);
791
792 return 0;
793}
794
795static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
796{
797 int dst;
798 struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
799 cpuhp_node);
800
801 if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu))
802 return 0;
803
804 dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
805 /* If there are no active CPUs in the DSU, leave IRQ disabled */
806 if (dst >= nr_cpu_ids) {
807 irq_set_affinity_hint(dsu_pmu->irq, NULL);
808 return 0;
809 }
810
811 perf_pmu_migrate_context(&dsu_pmu->pmu, cpu, dst);
812 dsu_pmu_set_active_cpu(dst, dsu_pmu);
813
814 return 0;
815}
816
817static int __init dsu_pmu_init(void)
818{
819 int ret;
820
821 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
822 DRVNAME,
823 dsu_pmu_cpu_online,
824 dsu_pmu_cpu_teardown);
825 if (ret < 0)
826 return ret;
827 dsu_pmu_cpuhp_state = ret;
828 return platform_driver_register(&dsu_pmu_driver);
829}
830
831static void __exit dsu_pmu_exit(void)
832{
833 platform_driver_unregister(&dsu_pmu_driver);
834 cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
835}
836
837module_init(dsu_pmu_init);
838module_exit(dsu_pmu_exit);
839
840MODULE_DEVICE_TABLE(of, dsu_pmu_of_match);
841MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit");
842MODULE_AUTHOR("Suzuki K Poulose <suzuki.poulose@arm.com>");
843MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 91b224eced18..46501cc79fd7 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -82,19 +82,10 @@ static int pmu_parse_irq_affinity(struct device_node *node, int i)
82 return -EINVAL; 82 return -EINVAL;
83 } 83 }
84 84
85 /* Now look up the logical CPU number */ 85 cpu = of_cpu_node_to_id(dn);
86 for_each_possible_cpu(cpu) { 86 if (cpu < 0) {
87 struct device_node *cpu_dn;
88
89 cpu_dn = of_cpu_device_node_get(cpu);
90 of_node_put(cpu_dn);
91
92 if (dn == cpu_dn)
93 break;
94 }
95
96 if (cpu >= nr_cpu_ids) {
97 pr_warn("failed to find logical CPU for %s\n", dn->name); 87 pr_warn("failed to find logical CPU for %s\n", dn->name);
88 cpu = nr_cpu_ids;
98 } 89 }
99 90
100 of_node_put(dn); 91 of_node_put(dn);
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 8ce262fc2561..51b40aecb776 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -1164,6 +1164,15 @@ static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
1164 struct arm_spe_pmu *spe_pmu; 1164 struct arm_spe_pmu *spe_pmu;
1165 struct device *dev = &pdev->dev; 1165 struct device *dev = &pdev->dev;
1166 1166
1167 /*
1168 * If kernelspace is unmapped when running at EL0, then the SPE
1169 * buffer will fault and prematurely terminate the AUX session.
1170 */
1171 if (arm64_kernel_unmapped_at_el0()) {
1172 dev_warn_once(dev, "profiling buffer inaccessible. Try passing \"kpti=off\" on the kernel command line\n");
1173 return -EPERM;
1174 }
1175
1167 spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL); 1176 spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL);
1168 if (!spe_pmu) { 1177 if (!spe_pmu) {
1169 dev_err(dev, "failed to allocate spe_pmu\n"); 1178 dev_err(dev, "failed to allocate spe_pmu\n");
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
new file mode 100644
index 000000000000..942afbd544b7
--- /dev/null
+++ b/include/linux/arm_sdei.h
@@ -0,0 +1,79 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (C) 2017 Arm Ltd.
3#ifndef __LINUX_ARM_SDEI_H
4#define __LINUX_ARM_SDEI_H
5
6#include <uapi/linux/arm_sdei.h>
7
8enum sdei_conduit_types {
9 CONDUIT_INVALID = 0,
10 CONDUIT_SMC,
11 CONDUIT_HVC,
12};
13
14#include <asm/sdei.h>
15
16/* Arch code should override this to set the entry point from firmware... */
17#ifndef sdei_arch_get_entry_point
18#define sdei_arch_get_entry_point(conduit) (0)
19#endif
20
21/*
22 * When an event occurs sdei_event_handler() will call a user-provided callback
23 * like this in NMI context on the CPU that received the event.
24 */
25typedef int (sdei_event_callback)(u32 event, struct pt_regs *regs, void *arg);
26
27/*
28 * Register your callback to claim an event. The event must be described
29 * by firmware.
30 */
31int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg);
32
33/*
34 * Calls to sdei_event_unregister() may return EINPROGRESS. Keep calling
35 * it until it succeeds.
36 */
37int sdei_event_unregister(u32 event_num);
38
39int sdei_event_enable(u32 event_num);
40int sdei_event_disable(u32 event_num);
41
42#ifdef CONFIG_ARM_SDE_INTERFACE
43/* For use by arch code when CPU hotplug notifiers are not appropriate. */
44int sdei_mask_local_cpu(void);
45int sdei_unmask_local_cpu(void);
46#else
47static inline int sdei_mask_local_cpu(void) { return 0; }
48static inline int sdei_unmask_local_cpu(void) { return 0; }
49#endif /* CONFIG_ARM_SDE_INTERFACE */
50
51
52/*
53 * This struct represents an event that has been registered. The driver
54 * maintains a list of all events, and which ones are registered. (Private
55 * events have one entry in the list, but are registered on each CPU).
56 * A pointer to this struct is passed to firmware, and back to the event
57 * handler. The event handler can then use this to invoke the registered
58 * callback, without having to walk the list.
59 *
60 * For CPU private events, this structure is per-cpu.
61 */
62struct sdei_registered_event {
63 /* For use by arch code: */
64 struct pt_regs interrupted_regs;
65
66 sdei_event_callback *callback;
67 void *callback_arg;
68 u32 event_num;
69 u8 priority;
70};
71
72/* The arch code entry point should then call this when an event arrives. */
73int notrace sdei_event_handler(struct pt_regs *regs,
74 struct sdei_registered_event *arg);
75
76/* arch code may use this to retrieve the extra registers. */
77int sdei_api_event_context(u32 query, u64 *result);
78
79#endif /* __LINUX_ARM_SDEI_H */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 1a32e558eb11..2c787c5cad3d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -109,6 +109,7 @@ enum cpuhp_state {
109 CPUHP_AP_PERF_XTENSA_STARTING, 109 CPUHP_AP_PERF_XTENSA_STARTING,
110 CPUHP_AP_PERF_METAG_STARTING, 110 CPUHP_AP_PERF_METAG_STARTING,
111 CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, 111 CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
112 CPUHP_AP_ARM_SDEI_STARTING,
112 CPUHP_AP_ARM_VFP_STARTING, 113 CPUHP_AP_ARM_VFP_STARTING,
113 CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, 114 CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
114 CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, 115 CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 8f7788d23b57..871f9e21810c 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -257,22 +257,30 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
257{return 0;} 257{return 0;}
258#endif 258#endif
259 259
260#define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \ 260#define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, is_retention) \
261({ \ 261({ \
262 int __ret; \ 262 int __ret = 0; \
263 \ 263 \
264 if (!idx) { \ 264 if (!idx) { \
265 cpu_do_idle(); \ 265 cpu_do_idle(); \
266 return idx; \ 266 return idx; \
267 } \ 267 } \
268 \ 268 \
269 __ret = cpu_pm_enter(); \ 269 if (!is_retention) \
270 if (!__ret) { \ 270 __ret = cpu_pm_enter(); \
271 __ret = low_level_idle_enter(idx); \ 271 if (!__ret) { \
272 cpu_pm_exit(); \ 272 __ret = low_level_idle_enter(idx); \
273 } \ 273 if (!is_retention) \
274 \ 274 cpu_pm_exit(); \
275 __ret ? -1 : idx; \ 275 } \
276 \
277 __ret ? -1 : idx; \
276}) 278})
277 279
280#define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \
281 __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 0)
282
283#define CPU_PM_CPU_IDLE_ENTER_RETENTION(low_level_idle_enter, idx) \
284 __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 1)
285
278#endif /* _LINUX_CPUIDLE_H */ 286#endif /* _LINUX_CPUIDLE_H */
diff --git a/include/linux/of.h b/include/linux/of.h
index d3dea1d1e3a9..173102dafb07 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -544,6 +544,8 @@ const char *of_prop_next_string(struct property *prop, const char *cur);
544 544
545bool of_console_check(struct device_node *dn, char *name, int index); 545bool of_console_check(struct device_node *dn, char *name, int index);
546 546
547extern int of_cpu_node_to_id(struct device_node *np);
548
547#else /* CONFIG_OF */ 549#else /* CONFIG_OF */
548 550
549static inline void of_core_init(void) 551static inline void of_core_init(void)
@@ -916,6 +918,11 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag
916{ 918{
917} 919}
918 920
921static inline int of_cpu_node_to_id(struct device_node *np)
922{
923 return -ENODEV;
924}
925
919#define of_match_ptr(_ptr) NULL 926#define of_match_ptr(_ptr) NULL
920#define of_match_node(_matches, _node) NULL 927#define of_match_node(_matches, _node) NULL
921#endif /* CONFIG_OF */ 928#endif /* CONFIG_OF */
diff --git a/include/linux/psci.h b/include/linux/psci.h
index bdea1cb5e1db..f724fd8c78e8 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -26,6 +26,7 @@ int psci_cpu_init_idle(unsigned int cpu);
26int psci_cpu_suspend_enter(unsigned long index); 26int psci_cpu_suspend_enter(unsigned long index);
27 27
28struct psci_operations { 28struct psci_operations {
29 u32 (*get_version)(void);
29 int (*cpu_suspend)(u32 state, unsigned long entry_point); 30 int (*cpu_suspend)(u32 state, unsigned long entry_point);
30 int (*cpu_off)(u32 state); 31 int (*cpu_off)(u32 state);
31 int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); 32 int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
@@ -46,10 +47,11 @@ static inline int psci_dt_init(void) { return 0; }
46#if defined(CONFIG_ARM_PSCI_FW) && defined(CONFIG_ACPI) 47#if defined(CONFIG_ARM_PSCI_FW) && defined(CONFIG_ACPI)
47int __init psci_acpi_init(void); 48int __init psci_acpi_init(void);
48bool __init acpi_psci_present(void); 49bool __init acpi_psci_present(void);
49bool __init acpi_psci_use_hvc(void); 50bool acpi_psci_use_hvc(void);
50#else 51#else
51static inline int psci_acpi_init(void) { return 0; } 52static inline int psci_acpi_init(void) { return 0; }
52static inline bool acpi_psci_present(void) { return false; } 53static inline bool acpi_psci_present(void) { return false; }
54static inline bool acpi_psci_use_hvc(void) {return false; }
53#endif 55#endif
54 56
55#endif /* __LINUX_PSCI_H */ 57#endif /* __LINUX_PSCI_H */
diff --git a/include/uapi/linux/arm_sdei.h b/include/uapi/linux/arm_sdei.h
new file mode 100644
index 000000000000..af0630ba5437
--- /dev/null
+++ b/include/uapi/linux/arm_sdei.h
@@ -0,0 +1,73 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/* Copyright (C) 2017 Arm Ltd. */
3#ifndef _UAPI_LINUX_ARM_SDEI_H
4#define _UAPI_LINUX_ARM_SDEI_H
5
6#define SDEI_1_0_FN_BASE 0xC4000020
7#define SDEI_1_0_MASK 0xFFFFFFE0
8#define SDEI_1_0_FN(n) (SDEI_1_0_FN_BASE + (n))
9
10#define SDEI_1_0_FN_SDEI_VERSION SDEI_1_0_FN(0x00)
11#define SDEI_1_0_FN_SDEI_EVENT_REGISTER SDEI_1_0_FN(0x01)
12#define SDEI_1_0_FN_SDEI_EVENT_ENABLE SDEI_1_0_FN(0x02)
13#define SDEI_1_0_FN_SDEI_EVENT_DISABLE SDEI_1_0_FN(0x03)
14#define SDEI_1_0_FN_SDEI_EVENT_CONTEXT SDEI_1_0_FN(0x04)
15#define SDEI_1_0_FN_SDEI_EVENT_COMPLETE SDEI_1_0_FN(0x05)
16#define SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME SDEI_1_0_FN(0x06)
17#define SDEI_1_0_FN_SDEI_EVENT_UNREGISTER SDEI_1_0_FN(0x07)
18#define SDEI_1_0_FN_SDEI_EVENT_STATUS SDEI_1_0_FN(0x08)
19#define SDEI_1_0_FN_SDEI_EVENT_GET_INFO SDEI_1_0_FN(0x09)
20#define SDEI_1_0_FN_SDEI_EVENT_ROUTING_SET SDEI_1_0_FN(0x0A)
21#define SDEI_1_0_FN_SDEI_PE_MASK SDEI_1_0_FN(0x0B)
22#define SDEI_1_0_FN_SDEI_PE_UNMASK SDEI_1_0_FN(0x0C)
23#define SDEI_1_0_FN_SDEI_INTERRUPT_BIND SDEI_1_0_FN(0x0D)
24#define SDEI_1_0_FN_SDEI_INTERRUPT_RELEASE SDEI_1_0_FN(0x0E)
25#define SDEI_1_0_FN_SDEI_PRIVATE_RESET SDEI_1_0_FN(0x11)
26#define SDEI_1_0_FN_SDEI_SHARED_RESET SDEI_1_0_FN(0x12)
27
28#define SDEI_VERSION_MAJOR_SHIFT 48
29#define SDEI_VERSION_MAJOR_MASK 0x7fff
30#define SDEI_VERSION_MINOR_SHIFT 32
31#define SDEI_VERSION_MINOR_MASK 0xffff
32#define SDEI_VERSION_VENDOR_SHIFT 0
33#define SDEI_VERSION_VENDOR_MASK 0xffffffff
34
35#define SDEI_VERSION_MAJOR(x) (x>>SDEI_VERSION_MAJOR_SHIFT & SDEI_VERSION_MAJOR_MASK)
36#define SDEI_VERSION_MINOR(x) (x>>SDEI_VERSION_MINOR_SHIFT & SDEI_VERSION_MINOR_MASK)
37#define SDEI_VERSION_VENDOR(x) (x>>SDEI_VERSION_VENDOR_SHIFT & SDEI_VERSION_VENDOR_MASK)
38
39/* SDEI return values */
40#define SDEI_SUCCESS 0
41#define SDEI_NOT_SUPPORTED -1
42#define SDEI_INVALID_PARAMETERS -2
43#define SDEI_DENIED -3
44#define SDEI_PENDING -5
45#define SDEI_OUT_OF_RESOURCE -10
46
47/* EVENT_REGISTER flags */
48#define SDEI_EVENT_REGISTER_RM_ANY 0
49#define SDEI_EVENT_REGISTER_RM_PE 1
50
51/* EVENT_STATUS return value bits */
52#define SDEI_EVENT_STATUS_RUNNING 2
53#define SDEI_EVENT_STATUS_ENABLED 1
54#define SDEI_EVENT_STATUS_REGISTERED 0
55
56/* EVENT_COMPLETE status values */
57#define SDEI_EV_HANDLED 0
58#define SDEI_EV_FAILED 1
59
60/* GET_INFO values */
61#define SDEI_EVENT_INFO_EV_TYPE 0
62#define SDEI_EVENT_INFO_EV_SIGNALED 1
63#define SDEI_EVENT_INFO_EV_PRIORITY 2
64#define SDEI_EVENT_INFO_EV_ROUTING_MODE 3
65#define SDEI_EVENT_INFO_EV_ROUTING_AFF 4
66
67/* and their results */
68#define SDEI_EVENT_TYPE_PRIVATE 0
69#define SDEI_EVENT_TYPE_SHARED 1
70#define SDEI_EVENT_PRIORITY_NORMAL 0
71#define SDEI_EVENT_PRIORITY_CRITICAL 1
72
73#endif /* _UAPI_LINUX_ARM_SDEI_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d99fe3fdec8a..02f7d6e57d9e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4913,6 +4913,7 @@ void perf_event_update_userpage(struct perf_event *event)
4913unlock: 4913unlock:
4914 rcu_read_unlock(); 4914 rcu_read_unlock();
4915} 4915}
4916EXPORT_SYMBOL_GPL(perf_event_update_userpage);
4916 4917
4917static int perf_mmap_fault(struct vm_fault *vmf) 4918static int perf_mmap_fault(struct vm_fault *vmf)
4918{ 4919{
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 2e43f9d42bd5..08464b2fba1d 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -53,8 +53,8 @@
53__asm__(".arch_extension virt"); 53__asm__(".arch_extension virt");
54#endif 54#endif
55 55
56DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
56static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); 57static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
57static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
58 58
59/* Per-CPU variable containing the currently running vcpu. */ 59/* Per-CPU variable containing the currently running vcpu. */
60static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); 60static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
@@ -354,7 +354,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
354 } 354 }
355 355
356 vcpu->cpu = cpu; 356 vcpu->cpu = cpu;
357 vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); 357 vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state);
358 358
359 kvm_arm_set_running_vcpu(vcpu); 359 kvm_arm_set_running_vcpu(vcpu);
360 kvm_vgic_load(vcpu); 360 kvm_vgic_load(vcpu);
@@ -509,7 +509,7 @@ static void update_vttbr(struct kvm *kvm)
509 pgd_phys = virt_to_phys(kvm->arch.pgd); 509 pgd_phys = virt_to_phys(kvm->arch.pgd);
510 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 510 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
511 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); 511 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
512 kvm->arch.vttbr = pgd_phys | vmid; 512 kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
513 513
514 spin_unlock(&kvm_vmid_lock); 514 spin_unlock(&kvm_vmid_lock);
515} 515}
@@ -704,9 +704,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
704 */ 704 */
705 trace_kvm_entry(*vcpu_pc(vcpu)); 705 trace_kvm_entry(*vcpu_pc(vcpu));
706 guest_enter_irqoff(); 706 guest_enter_irqoff();
707 if (has_vhe())
708 kvm_arm_vhe_guest_enter();
707 709
708 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); 710 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
709 711
712 if (has_vhe())
713 kvm_arm_vhe_guest_exit();
710 vcpu->mode = OUTSIDE_GUEST_MODE; 714 vcpu->mode = OUTSIDE_GUEST_MODE;
711 vcpu->stat.exits++; 715 vcpu->stat.exits++;
712 /* 716 /*
@@ -759,6 +763,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
759 guest_exit(); 763 guest_exit();
760 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); 764 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
761 765
766 /* Exit types that need handling before we can be preempted */
767 handle_exit_early(vcpu, run, ret);
768
762 preempt_enable(); 769 preempt_enable();
763 770
764 ret = handle_exit(vcpu, run, ret); 771 ret = handle_exit(vcpu, run, ret);
@@ -1158,7 +1165,7 @@ static void cpu_init_hyp_mode(void *dummy)
1158 pgd_ptr = kvm_mmu_get_httbr(); 1165 pgd_ptr = kvm_mmu_get_httbr();
1159 stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); 1166 stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
1160 hyp_stack_ptr = stack_page + PAGE_SIZE; 1167 hyp_stack_ptr = stack_page + PAGE_SIZE;
1161 vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); 1168 vector_ptr = (unsigned long)kvm_get_hyp_vector();
1162 1169
1163 __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); 1170 __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
1164 __cpu_init_stage2(); 1171 __cpu_init_stage2();
@@ -1272,19 +1279,8 @@ static inline void hyp_cpu_pm_exit(void)
1272} 1279}
1273#endif 1280#endif
1274 1281
1275static void teardown_common_resources(void)
1276{
1277 free_percpu(kvm_host_cpu_state);
1278}
1279
1280static int init_common_resources(void) 1282static int init_common_resources(void)
1281{ 1283{
1282 kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
1283 if (!kvm_host_cpu_state) {
1284 kvm_err("Cannot allocate host CPU state\n");
1285 return -ENOMEM;
1286 }
1287
1288 /* set size of VMID supported by CPU */ 1284 /* set size of VMID supported by CPU */
1289 kvm_vmid_bits = kvm_get_vmid_bits(); 1285 kvm_vmid_bits = kvm_get_vmid_bits();
1290 kvm_info("%d-bit VMID\n", kvm_vmid_bits); 1286 kvm_info("%d-bit VMID\n", kvm_vmid_bits);
@@ -1403,6 +1399,12 @@ static int init_hyp_mode(void)
1403 goto out_err; 1399 goto out_err;
1404 } 1400 }
1405 1401
1402 err = kvm_map_vectors();
1403 if (err) {
1404 kvm_err("Cannot map vectors\n");
1405 goto out_err;
1406 }
1407
1406 /* 1408 /*
1407 * Map the Hyp stack pages 1409 * Map the Hyp stack pages
1408 */ 1410 */
@@ -1420,7 +1422,7 @@ static int init_hyp_mode(void)
1420 for_each_possible_cpu(cpu) { 1422 for_each_possible_cpu(cpu) {
1421 kvm_cpu_context_t *cpu_ctxt; 1423 kvm_cpu_context_t *cpu_ctxt;
1422 1424
1423 cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); 1425 cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
1424 err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); 1426 err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
1425 1427
1426 if (err) { 1428 if (err) {
@@ -1544,7 +1546,6 @@ out_hyp:
1544 if (!in_hyp_mode) 1546 if (!in_hyp_mode)
1545 teardown_hyp_mode(); 1547 teardown_hyp_mode();
1546out_err: 1548out_err:
1547 teardown_common_resources();
1548 return err; 1549 return err;
1549} 1550}
1550 1551
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 9dea96380339..f8eaf86b740a 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -621,7 +621,7 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
621 return 0; 621 return 0;
622} 622}
623 623
624static int __create_hyp_mappings(pgd_t *pgdp, 624static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
625 unsigned long start, unsigned long end, 625 unsigned long start, unsigned long end,
626 unsigned long pfn, pgprot_t prot) 626 unsigned long pfn, pgprot_t prot)
627{ 627{
@@ -634,7 +634,7 @@ static int __create_hyp_mappings(pgd_t *pgdp,
634 addr = start & PAGE_MASK; 634 addr = start & PAGE_MASK;
635 end = PAGE_ALIGN(end); 635 end = PAGE_ALIGN(end);
636 do { 636 do {
637 pgd = pgdp + pgd_index(addr); 637 pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
638 638
639 if (pgd_none(*pgd)) { 639 if (pgd_none(*pgd)) {
640 pud = pud_alloc_one(NULL, addr); 640 pud = pud_alloc_one(NULL, addr);
@@ -697,8 +697,8 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot)
697 int err; 697 int err;
698 698
699 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); 699 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
700 err = __create_hyp_mappings(hyp_pgd, virt_addr, 700 err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD,
701 virt_addr + PAGE_SIZE, 701 virt_addr, virt_addr + PAGE_SIZE,
702 __phys_to_pfn(phys_addr), 702 __phys_to_pfn(phys_addr),
703 prot); 703 prot);
704 if (err) 704 if (err)
@@ -729,7 +729,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
729 if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) 729 if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
730 return -EINVAL; 730 return -EINVAL;
731 731
732 return __create_hyp_mappings(hyp_pgd, start, end, 732 return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end,
733 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); 733 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
734} 734}
735 735
@@ -1735,7 +1735,7 @@ static int kvm_map_idmap_text(pgd_t *pgd)
1735 int err; 1735 int err;
1736 1736
1737 /* Create the idmap in the boot page tables */ 1737 /* Create the idmap in the boot page tables */
1738 err = __create_hyp_mappings(pgd, 1738 err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
1739 hyp_idmap_start, hyp_idmap_end, 1739 hyp_idmap_start, hyp_idmap_end,
1740 __phys_to_pfn(hyp_idmap_start), 1740 __phys_to_pfn(hyp_idmap_start),
1741 PAGE_HYP_EXEC); 1741 PAGE_HYP_EXEC);