aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2018-10-19 09:24:24 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2018-10-19 09:24:24 -0400
commite42b4a507efa19a90c63e7968c93c4f82d3bc805 (patch)
treecc7bf34323b26d3f53d4a9ec4ca1b5c52bc361cd
parent1e58e5e59148916fa43444a406335a990783fb78 (diff)
parente4e11cc0f81ee7be17d6f6fb96128a6d51c0e838 (diff)
Merge tag 'kvmarm-for-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm updates for 4.20 - Improved guest IPA space support (32 to 52 bits) - RAS event delivery for 32bit - PMU fixes - Guest entry hardening - Various cleanups
-rw-r--r--Documentation/virtual/kvm/api.txt31
-rw-r--r--MAINTAINERS11
-rw-r--r--Makefile2
-rw-r--r--arch/arm/include/asm/kvm_arm.h3
-rw-r--r--arch/arm/include/asm/kvm_host.h13
-rw-r--r--arch/arm/include/asm/kvm_mmu.h15
-rw-r--r--arch/arm/include/asm/stage2_pgtable.h54
-rw-r--r--arch/arm64/include/asm/cpufeature.h20
-rw-r--r--arch/arm64/include/asm/kvm_arm.h155
-rw-r--r--arch/arm64/include/asm/kvm_asm.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h18
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h10
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h42
-rw-r--r--arch/arm64/include/asm/ptrace.h3
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopmd.h42
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopud.h39
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h236
-rw-r--r--arch/arm64/kvm/guest.c6
-rw-r--r--arch/arm64/kvm/handle_exit.c7
-rw-r--r--arch/arm64/kvm/hyp/Makefile1
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S16
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c90
-rw-r--r--arch/arm64/kvm/hyp/switch.c4
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c19
-rw-r--r--arch/arm64/kvm/hyp/tlb.c4
-rw-r--r--arch/arm64/kvm/reset.c108
-rw-r--r--arch/x86/include/asm/fixmap.h10
-rw-r--r--arch/x86/include/asm/mem_encrypt.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h17
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c27
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c53
-rw-r--r--arch/x86/kernel/head64.c20
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/kvmclock.c52
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S19
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/mem_encrypt.c24
-rw-r--r--arch/x86/mm/pgtable.c9
-rw-r--r--arch/x86/xen/mmu_pv.c8
-rw-r--r--arch/x86/xen/pmu.c2
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-core.c4
-rw-r--r--block/genhd.c6
-rw-r--r--block/partition-generic.c6
-rw-r--r--drivers/firmware/efi/Kconfig9
-rw-r--r--drivers/mfd/omap-usb-host.c11
-rw-r--r--drivers/pinctrl/intel/pinctrl-cannonlake.c2
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.c111
-rw-r--r--drivers/xen/grant-table.c27
-rw-r--r--include/linux/genhd.h5
-rw-r--r--include/linux/irqchip/arm-gic-v3.h5
-rw-r--r--include/linux/mfd/da9063/pdata.h16
-rw-r--r--include/uapi/linux/kvm.h10
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/libbpf.c20
-rw-r--r--tools/lib/bpf/str_error.c18
-rw-r--r--tools/lib/bpf/str_error.h6
-rw-r--r--tools/perf/Documentation/Makefile2
-rw-r--r--virt/kvm/arm/arm.c26
-rw-r--r--virt/kvm/arm/mmu.c128
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c36
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c2
65 files changed, 1118 insertions, 565 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 07e87a7c665d..cd209f7730af 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -123,6 +123,37 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
123flag KVM_VM_MIPS_VZ. 123flag KVM_VM_MIPS_VZ.
124 124
125 125
126On arm64, the physical address size for a VM (IPA Size limit) is limited
127to 40bits by default. The limit can be configured if the host supports the
128extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
129KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type
130identifier, where IPA_Bits is the maximum width of any physical
131address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
132machine type identifier.
133
134e.g, to configure a guest to use 48bit physical address size :
135
136 vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
137
138The requested size (IPA_Bits) must be :
139 0 - Implies default size, 40bits (for backward compatibility)
140
141 or
142
143 N - Implies N bits, where N is a positive integer such that,
144 32 <= N <= Host_IPA_Limit
145
146Host_IPA_Limit is the maximum possible value for IPA_Bits on the host and
147is dependent on the CPU capability and the kernel configuration. The limit can
148be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
149ioctl() at run-time.
150
151Please note that configuring the IPA size does not affect the capability
152exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
153size of the address translated by the stage2 level (guest physical to
154host physical address translations).
155
156
1264.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST 1574.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
127 158
128Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST 159Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
diff --git a/MAINTAINERS b/MAINTAINERS
index 1610fb26bdac..86e019c7b0fa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12260,6 +12260,7 @@ F: Documentation/networking/rds.txt
12260 12260
12261RDT - RESOURCE ALLOCATION 12261RDT - RESOURCE ALLOCATION
12262M: Fenghua Yu <fenghua.yu@intel.com> 12262M: Fenghua Yu <fenghua.yu@intel.com>
12263M: Reinette Chatre <reinette.chatre@intel.com>
12263L: linux-kernel@vger.kernel.org 12264L: linux-kernel@vger.kernel.org
12264S: Supported 12265S: Supported
12265F: arch/x86/kernel/cpu/intel_rdt* 12266F: arch/x86/kernel/cpu/intel_rdt*
@@ -15924,6 +15925,7 @@ F: net/x25/
15924X86 ARCHITECTURE (32-BIT AND 64-BIT) 15925X86 ARCHITECTURE (32-BIT AND 64-BIT)
15925M: Thomas Gleixner <tglx@linutronix.de> 15926M: Thomas Gleixner <tglx@linutronix.de>
15926M: Ingo Molnar <mingo@redhat.com> 15927M: Ingo Molnar <mingo@redhat.com>
15928M: Borislav Petkov <bp@alien8.de>
15927R: "H. Peter Anvin" <hpa@zytor.com> 15929R: "H. Peter Anvin" <hpa@zytor.com>
15928M: x86@kernel.org 15930M: x86@kernel.org
15929L: linux-kernel@vger.kernel.org 15931L: linux-kernel@vger.kernel.org
@@ -15952,6 +15954,15 @@ M: Borislav Petkov <bp@alien8.de>
15952S: Maintained 15954S: Maintained
15953F: arch/x86/kernel/cpu/microcode/* 15955F: arch/x86/kernel/cpu/microcode/*
15954 15956
15957X86 MM
15958M: Dave Hansen <dave.hansen@linux.intel.com>
15959M: Andy Lutomirski <luto@kernel.org>
15960M: Peter Zijlstra <peterz@infradead.org>
15961L: linux-kernel@vger.kernel.org
15962T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm
15963S: Maintained
15964F: arch/x86/mm/
15965
15955X86 PLATFORM DRIVERS 15966X86 PLATFORM DRIVERS
15956M: Darren Hart <dvhart@infradead.org> 15967M: Darren Hart <dvhart@infradead.org>
15957M: Andy Shevchenko <andy@infradead.org> 15968M: Andy Shevchenko <andy@infradead.org>
diff --git a/Makefile b/Makefile
index f03a1e062503..0c90c4354979 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 4 2VERSION = 4
3PATCHLEVEL = 19 3PATCHLEVEL = 19
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc4 5EXTRAVERSION = -rc5
6NAME = Merciless Moray 6NAME = Merciless Moray
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 3ab8b3781bfe..c3f1f9b304b7 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -133,8 +133,7 @@
133 * space. 133 * space.
134 */ 134 */
135#define KVM_PHYS_SHIFT (40) 135#define KVM_PHYS_SHIFT (40)
136#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) 136
137#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL))
138#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) 137#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
139 138
140/* Virtualization Translation Control Register (VTCR) bits */ 139/* Virtualization Translation Control Register (VTCR) bits */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 3ad482d2f1eb..5ca5d9af0c26 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void)
273 kvm_call_hyp(__init_stage2_translation); 273 kvm_call_hyp(__init_stage2_translation);
274} 274}
275 275
276static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) 276static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
277{ 277{
278 return 0; 278 return 0;
279} 279}
@@ -354,4 +354,15 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
354struct kvm *kvm_arch_alloc_vm(void); 354struct kvm *kvm_arch_alloc_vm(void);
355void kvm_arch_free_vm(struct kvm *kvm); 355void kvm_arch_free_vm(struct kvm *kvm);
356 356
357static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
358{
359 /*
360 * On 32bit ARM, VMs get a static 40bit IPA stage2 setup,
361 * so any non-zero value used as type is illegal.
362 */
363 if (type)
364 return -EINVAL;
365 return 0;
366}
367
357#endif /* __ARM_KVM_HOST_H__ */ 368#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 265ea9cf7df7..5ad1a54f98dc 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -35,16 +35,12 @@
35 addr; \ 35 addr; \
36 }) 36 })
37 37
38/*
39 * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
40 */
41#define KVM_MMU_CACHE_MIN_PAGES 2
42
43#ifndef __ASSEMBLY__ 38#ifndef __ASSEMBLY__
44 39
45#include <linux/highmem.h> 40#include <linux/highmem.h>
46#include <asm/cacheflush.h> 41#include <asm/cacheflush.h>
47#include <asm/cputype.h> 42#include <asm/cputype.h>
43#include <asm/kvm_arm.h>
48#include <asm/kvm_hyp.h> 44#include <asm/kvm_hyp.h>
49#include <asm/pgalloc.h> 45#include <asm/pgalloc.h>
50#include <asm/stage2_pgtable.h> 46#include <asm/stage2_pgtable.h>
@@ -52,6 +48,13 @@
52/* Ensure compatibility with arm64 */ 48/* Ensure compatibility with arm64 */
53#define VA_BITS 32 49#define VA_BITS 32
54 50
51#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT
52#define kvm_phys_size(kvm) (1ULL << kvm_phys_shift(kvm))
53#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - 1ULL)
54#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK
55
56#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t))
57
55int create_hyp_mappings(void *from, void *to, pgprot_t prot); 58int create_hyp_mappings(void *from, void *to, pgprot_t prot);
56int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, 59int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
57 void __iomem **kaddr, 60 void __iomem **kaddr,
@@ -355,6 +358,8 @@ static inline int hyp_map_aux_data(void)
355 358
356#define kvm_phys_to_vttbr(addr) (addr) 359#define kvm_phys_to_vttbr(addr) (addr)
357 360
361static inline void kvm_set_ipa_limit(void) {}
362
358#endif /* !__ASSEMBLY__ */ 363#endif /* !__ASSEMBLY__ */
359 364
360#endif /* __ARM_KVM_MMU_H__ */ 365#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
index 460d616bb2d6..f6a7ea805232 100644
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -19,43 +19,53 @@
19#ifndef __ARM_S2_PGTABLE_H_ 19#ifndef __ARM_S2_PGTABLE_H_
20#define __ARM_S2_PGTABLE_H_ 20#define __ARM_S2_PGTABLE_H_
21 21
22#define stage2_pgd_none(pgd) pgd_none(pgd) 22/*
23#define stage2_pgd_clear(pgd) pgd_clear(pgd) 23 * kvm_mmu_cache_min_pages() is the number of pages required
24#define stage2_pgd_present(pgd) pgd_present(pgd) 24 * to install a stage-2 translation. We pre-allocate the entry
25#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) 25 * level table at VM creation. Since we have a 3 level page-table,
26#define stage2_pud_offset(pgd, address) pud_offset(pgd, address) 26 * we need only two pages to add a new mapping.
27#define stage2_pud_free(pud) pud_free(NULL, pud) 27 */
28 28#define kvm_mmu_cache_min_pages(kvm) 2
29#define stage2_pud_none(pud) pud_none(pud) 29
30#define stage2_pud_clear(pud) pud_clear(pud) 30#define stage2_pgd_none(kvm, pgd) pgd_none(pgd)
31#define stage2_pud_present(pud) pud_present(pud) 31#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd)
32#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) 32#define stage2_pgd_present(kvm, pgd) pgd_present(pgd)
33#define stage2_pmd_offset(pud, address) pmd_offset(pud, address) 33#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud)
34#define stage2_pmd_free(pmd) pmd_free(NULL, pmd) 34#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address)
35 35#define stage2_pud_free(kvm, pud) pud_free(NULL, pud)
36#define stage2_pud_huge(pud) pud_huge(pud) 36
37#define stage2_pud_none(kvm, pud) pud_none(pud)
38#define stage2_pud_clear(kvm, pud) pud_clear(pud)
39#define stage2_pud_present(kvm, pud) pud_present(pud)
40#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd)
41#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address)
42#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd)
43
44#define stage2_pud_huge(kvm, pud) pud_huge(pud)
37 45
38/* Open coded p*d_addr_end that can deal with 64bit addresses */ 46/* Open coded p*d_addr_end that can deal with 64bit addresses */
39static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) 47static inline phys_addr_t
48stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
40{ 49{
41 phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK; 50 phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
42 51
43 return (boundary - 1 < end - 1) ? boundary : end; 52 return (boundary - 1 < end - 1) ? boundary : end;
44} 53}
45 54
46#define stage2_pud_addr_end(addr, end) (end) 55#define stage2_pud_addr_end(kvm, addr, end) (end)
47 56
48static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) 57static inline phys_addr_t
58stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
49{ 59{
50 phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK; 60 phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
51 61
52 return (boundary - 1 < end - 1) ? boundary : end; 62 return (boundary - 1 < end - 1) ? boundary : end;
53} 63}
54 64
55#define stage2_pgd_index(addr) pgd_index(addr) 65#define stage2_pgd_index(kvm, addr) pgd_index(addr)
56 66
57#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) 67#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
58#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 68#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
59#define stage2_pud_table_empty(pudp) false 69#define stage2_pud_table_empty(kvm, pudp) false
60 70
61#endif /* __ARM_S2_PGTABLE_H_ */ 71#endif /* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1717ba1db35d..072cc1c970c2 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -530,6 +530,26 @@ void arm64_set_ssbd_mitigation(bool state);
530static inline void arm64_set_ssbd_mitigation(bool state) {} 530static inline void arm64_set_ssbd_mitigation(bool state) {}
531#endif 531#endif
532 532
533static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
534{
535 switch (parange) {
536 case 0: return 32;
537 case 1: return 36;
538 case 2: return 40;
539 case 3: return 42;
540 case 4: return 44;
541 case 5: return 48;
542 case 6: return 52;
543 /*
544 * A future PE could use a value unknown to the kernel.
545 * However, by the "D10.1.4 Principles of the ID scheme
546 * for fields in ID registers", ARM DDI 0487C.a, any new
547 * value is guaranteed to be higher than what we know already.
548 * As a safe limit, we return the limit supported by the kernel.
549 */
550 default: return CONFIG_ARM64_PA_BITS;
551 }
552}
533#endif /* __ASSEMBLY__ */ 553#endif /* __ASSEMBLY__ */
534 554
535#endif 555#endif
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index aa45df752a16..6e324d1f1231 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,6 +107,7 @@
107#define VTCR_EL2_RES1 (1 << 31) 107#define VTCR_EL2_RES1 (1 << 31)
108#define VTCR_EL2_HD (1 << 22) 108#define VTCR_EL2_HD (1 << 22)
109#define VTCR_EL2_HA (1 << 21) 109#define VTCR_EL2_HA (1 << 21)
110#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
110#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK 111#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
111#define VTCR_EL2_TG0_MASK TCR_TG0_MASK 112#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
112#define VTCR_EL2_TG0_4K TCR_TG0_4K 113#define VTCR_EL2_TG0_4K TCR_TG0_4K
@@ -120,62 +121,149 @@
120#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA 121#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA
121#define VTCR_EL2_SL0_SHIFT 6 122#define VTCR_EL2_SL0_SHIFT 6
122#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) 123#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT)
123#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT)
124#define VTCR_EL2_T0SZ_MASK 0x3f 124#define VTCR_EL2_T0SZ_MASK 0x3f
125#define VTCR_EL2_T0SZ_40B 24
126#define VTCR_EL2_VS_SHIFT 19 125#define VTCR_EL2_VS_SHIFT 19
127#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT) 126#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT)
128#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT) 127#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT)
129 128
129#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x)
130
130/* 131/*
131 * We configure the Stage-2 page tables to always restrict the IPA space to be 132 * We configure the Stage-2 page tables to always restrict the IPA space to be
132 * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are 133 * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
133 * not known to exist and will break with this configuration. 134 * not known to exist and will break with this configuration.
134 * 135 *
135 * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time 136 * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
136 * (see hyp-init.S).
137 * 137 *
138 * Note that when using 4K pages, we concatenate two first level page tables 138 * Note that when using 4K pages, we concatenate two first level page tables
139 * together. With 16K pages, we concatenate 16 first level page tables. 139 * together. With 16K pages, we concatenate 16 first level page tables.
140 * 140 *
141 * The magic numbers used for VTTBR_X in this patch can be found in Tables
142 * D4-23 and D4-25 in ARM DDI 0487A.b.
143 */ 141 */
144 142
145#define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B
146#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ 143#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
147 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) 144 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
148 145
149#ifdef CONFIG_ARM64_64K_PAGES
150/* 146/*
151 * Stage2 translation configuration: 147 * VTCR_EL2:SL0 indicates the entry level for Stage2 translation.
152 * 64kB pages (TG0 = 1) 148 * Interestingly, it depends on the page size.
153 * 2 level page tables (SL = 1) 149 * See D.10.2.121, VTCR_EL2, in ARM DDI 0487C.a
150 *
151 * -----------------------------------------
152 * | Entry level | 4K | 16K/64K |
153 * ------------------------------------------
154 * | Level: 0 | 2 | - |
155 * ------------------------------------------
156 * | Level: 1 | 1 | 2 |
157 * ------------------------------------------
158 * | Level: 2 | 0 | 1 |
159 * ------------------------------------------
160 * | Level: 3 | - | 0 |
161 * ------------------------------------------
162 *
163 * The table roughly translates to :
164 *
165 * SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level
166 *
167 * Where TGRAN_SL0_BASE is a magic number depending on the page size:
168 * TGRAN_SL0_BASE(4K) = 2
169 * TGRAN_SL0_BASE(16K) = 3
170 * TGRAN_SL0_BASE(64K) = 3
171 * provided we take care of ruling out the unsupported cases and
172 * Entry_Level = 4 - Number_of_levels.
173 *
154 */ 174 */
155#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1) 175#ifdef CONFIG_ARM64_64K_PAGES
156#define VTTBR_X_TGRAN_MAGIC 38 176
177#define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K
178#define VTCR_EL2_TGRAN_SL0_BASE 3UL
179
157#elif defined(CONFIG_ARM64_16K_PAGES) 180#elif defined(CONFIG_ARM64_16K_PAGES)
158/* 181
159 * Stage2 translation configuration: 182#define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K
160 * 16kB pages (TG0 = 2) 183#define VTCR_EL2_TGRAN_SL0_BASE 3UL
161 * 2 level page tables (SL = 1) 184
162 */
163#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
164#define VTTBR_X_TGRAN_MAGIC 42
165#else /* 4K */ 185#else /* 4K */
166/* 186
167 * Stage2 translation configuration: 187#define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K
168 * 4kB pages (TG0 = 0) 188#define VTCR_EL2_TGRAN_SL0_BASE 2UL
169 * 3 level page tables (SL = 1) 189
170 */
171#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
172#define VTTBR_X_TGRAN_MAGIC 37
173#endif 190#endif
174 191
175#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) 192#define VTCR_EL2_LVLS_TO_SL0(levels) \
176#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) 193 ((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT)
194#define VTCR_EL2_SL0_TO_LVLS(sl0) \
195 ((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE)
196#define VTCR_EL2_LVLS(vtcr) \
197 VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT)
198
199#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN)
200#define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK))
201
202/*
203 * ARM VMSAv8-64 defines an algorithm for finding the translation table
204 * descriptors in section D4.2.8 in ARM DDI 0487C.a.
205 *
206 * The algorithm defines the expectations on the translation table
207 * addresses for each level, based on PAGE_SIZE, entry level
208 * and the translation table size (T0SZ). The variable "x" in the
209 * algorithm determines the alignment of a table base address at a given
210 * level and thus determines the alignment of VTTBR:BADDR for stage2
211 * page table entry level.
212 * Since the number of bits resolved at the entry level could vary
213 * depending on the T0SZ, the value of "x" is defined based on a
214 * Magic constant for a given PAGE_SIZE and Entry Level. The
215 * intermediate levels must be always aligned to the PAGE_SIZE (i.e,
216 * x = PAGE_SHIFT).
217 *
218 * The value of "x" for entry level is calculated as :
219 * x = Magic_N - T0SZ
220 *
221 * where Magic_N is an integer depending on the page size and the entry
222 * level of the page table as below:
223 *
224 * --------------------------------------------
225 * | Entry level | 4K 16K 64K |
226 * --------------------------------------------
227 * | Level: 0 (4 levels) | 28 | - | - |
228 * --------------------------------------------
229 * | Level: 1 (3 levels) | 37 | 31 | 25 |
230 * --------------------------------------------
231 * | Level: 2 (2 levels) | 46 | 42 | 38 |
232 * --------------------------------------------
233 * | Level: 3 (1 level) | - | 53 | 51 |
234 * --------------------------------------------
235 *
236 * We have a magic formula for the Magic_N below:
237 *
238 * Magic_N(PAGE_SIZE, Level) = 64 - ((PAGE_SHIFT - 3) * Number_of_levels)
239 *
240 * where Number_of_levels = (4 - Level). We are only interested in the
241 * value for Entry_Level for the stage2 page table.
242 *
243 * So, given that T0SZ = (64 - IPA_SHIFT), we can compute 'x' as follows:
244 *
245 * x = (64 - ((PAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT)
246 * = IPA_SHIFT - ((PAGE_SHIFT - 3) * Number of levels)
247 *
248 * Here is one way to explain the Magic Formula:
249 *
250 * x = log2(Size_of_Entry_Level_Table)
251 *
252 * Since, we can resolve (PAGE_SHIFT - 3) bits at each level, and another
253 * PAGE_SHIFT bits in the PTE, we have :
254 *
255 * Bits_Entry_level = IPA_SHIFT - ((PAGE_SHIFT - 3) * (n - 1) + PAGE_SHIFT)
256 * = IPA_SHIFT - (PAGE_SHIFT - 3) * n - 3
257 * where n = number of levels, and since each pointer is 8bytes, we have:
258 *
259 * x = Bits_Entry_Level + 3
260 * = IPA_SHIFT - (PAGE_SHIFT - 3) * n
261 *
262 * The only constraint here is that, we have to find the number of page table
263 * levels for a given IPA size (which we do, see stage2_pt_levels())
264 */
265#define ARM64_VTTBR_X(ipa, levels) ((ipa) - ((levels) * (PAGE_SHIFT - 3)))
177 266
178#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
179#define VTTBR_VMID_SHIFT (UL(48)) 267#define VTTBR_VMID_SHIFT (UL(48))
180#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) 268#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
181 269
@@ -223,6 +311,13 @@
223 311
224/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ 312/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
225#define HPFAR_MASK (~UL(0xf)) 313#define HPFAR_MASK (~UL(0xf))
314/*
315 * We have
316 * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
317 * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
318 */
319#define PAR_TO_HPFAR(par) \
320 (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
226 321
227#define kvm_arm_exception_type \ 322#define kvm_arm_exception_type \
228 {0, "IRQ" }, \ 323 {0, "IRQ" }, \
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 102b5a5c47b6..aea01a09eb94 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -30,6 +30,7 @@
30#define ARM_EXCEPTION_IRQ 0 30#define ARM_EXCEPTION_IRQ 0
31#define ARM_EXCEPTION_EL1_SERROR 1 31#define ARM_EXCEPTION_EL1_SERROR 1
32#define ARM_EXCEPTION_TRAP 2 32#define ARM_EXCEPTION_TRAP 2
33#define ARM_EXCEPTION_IL 3
33/* The hyp-stub will return this for any kvm_call_hyp() call */ 34/* The hyp-stub will return this for any kvm_call_hyp() call */
34#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR 35#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
35 36
@@ -72,8 +73,6 @@ extern void __vgic_v3_init_lrs(void);
72 73
73extern u32 __kvm_get_mdcr_el2(void); 74extern u32 __kvm_get_mdcr_el2(void);
74 75
75extern u32 __init_stage2_translation(void);
76
77/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ 76/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
78#define __hyp_this_cpu_ptr(sym) \ 77#define __hyp_this_cpu_ptr(sym) \
79 ({ \ 78 ({ \
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 3d6d7336f871..f84052f306af 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -53,7 +53,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
53 53
54int __attribute_const__ kvm_target_cpu(void); 54int __attribute_const__ kvm_target_cpu(void);
55int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 55int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
56int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); 56int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
57void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); 57void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
58 58
59struct kvm_arch { 59struct kvm_arch {
@@ -61,11 +61,13 @@ struct kvm_arch {
61 u64 vmid_gen; 61 u64 vmid_gen;
62 u32 vmid; 62 u32 vmid;
63 63
64 /* 1-level 2nd stage table, protected by kvm->mmu_lock */ 64 /* stage2 entry level table */
65 pgd_t *pgd; 65 pgd_t *pgd;
66 66
67 /* VTTBR value associated with above pgd and vmid */ 67 /* VTTBR value associated with above pgd and vmid */
68 u64 vttbr; 68 u64 vttbr;
69 /* VTCR_EL2 value for this VM */
70 u64 vtcr;
69 71
70 /* The last vcpu id that ran on each physical CPU */ 72 /* The last vcpu id that ran on each physical CPU */
71 int __percpu *last_vcpu_ran; 73 int __percpu *last_vcpu_ran;
@@ -440,13 +442,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
440int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, 442int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
441 struct kvm_device_attr *attr); 443 struct kvm_device_attr *attr);
442 444
443static inline void __cpu_init_stage2(void) 445static inline void __cpu_init_stage2(void) {}
444{
445 u32 parange = kvm_call_hyp(__init_stage2_translation);
446
447 WARN_ONCE(parange < 40,
448 "PARange is %d bits, unsupported configuration!", parange);
449}
450 446
451/* Guest/host FPSIMD coordination helpers */ 447/* Guest/host FPSIMD coordination helpers */
452int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); 448int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
@@ -509,8 +505,12 @@ static inline int kvm_arm_have_ssbd(void)
509void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); 505void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
510void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); 506void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
511 507
508void kvm_set_ipa_limit(void);
509
512#define __KVM_HAVE_ARCH_VM_ALLOC 510#define __KVM_HAVE_ARCH_VM_ALLOC
513struct kvm *kvm_arch_alloc_vm(void); 511struct kvm *kvm_arch_alloc_vm(void);
514void kvm_arch_free_vm(struct kvm *kvm); 512void kvm_arch_free_vm(struct kvm *kvm);
515 513
514int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
515
516#endif /* __ARM64_KVM_HOST_H__ */ 516#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 384c34397619..23aca66767f9 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -155,5 +155,15 @@ void deactivate_traps_vhe_put(void);
155u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); 155u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
156void __noreturn __hyp_do_panic(unsigned long, ...); 156void __noreturn __hyp_do_panic(unsigned long, ...);
157 157
158/*
159 * Must be called from hyp code running at EL2 with an updated VTTBR
160 * and interrupts disabled.
161 */
162static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
163{
164 write_sysreg(kvm->arch.vtcr, vtcr_el2);
165 write_sysreg(kvm->arch.vttbr, vttbr_el2);
166}
167
158#endif /* __ARM64_KVM_HYP_H__ */ 168#endif /* __ARM64_KVM_HYP_H__ */
159 169
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index d6fff7de5539..77b1af9e64db 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -141,8 +141,16 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
141 * We currently only support a 40bit IPA. 141 * We currently only support a 40bit IPA.
142 */ 142 */
143#define KVM_PHYS_SHIFT (40) 143#define KVM_PHYS_SHIFT (40)
144#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) 144
145#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) 145#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
146#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
147#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
148
149static inline bool kvm_page_empty(void *ptr)
150{
151 struct page *ptr_page = virt_to_page(ptr);
152 return page_count(ptr_page) == 1;
153}
146 154
147#include <asm/stage2_pgtable.h> 155#include <asm/stage2_pgtable.h>
148 156
@@ -238,12 +246,6 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
238 return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN); 246 return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
239} 247}
240 248
241static inline bool kvm_page_empty(void *ptr)
242{
243 struct page *ptr_page = virt_to_page(ptr);
244 return page_count(ptr_page) == 1;
245}
246
247#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) 249#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
248 250
249#ifdef __PAGETABLE_PMD_FOLDED 251#ifdef __PAGETABLE_PMD_FOLDED
@@ -517,5 +519,29 @@ static inline int hyp_map_aux_data(void)
517 519
518#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) 520#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
519 521
522/*
523 * Get the magic number 'x' for VTTBR:BADDR of this KVM instance.
524 * With v8.2 LVA extensions, 'x' should be a minimum of 6 with
525 * 52bit IPS.
526 */
527static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels)
528{
529 int x = ARM64_VTTBR_X(ipa_shift, levels);
530
531 return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x;
532}
533
534static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels)
535{
536 unsigned int x = arm64_vttbr_x(ipa_shift, levels);
537
538 return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
539}
540
541static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
542{
543 return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
544}
545
520#endif /* __ASSEMBLY__ */ 546#endif /* __ASSEMBLY__ */
521#endif /* __ARM64_KVM_MMU_H__ */ 547#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 177b851ca6d9..ff35ac1258eb 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -25,6 +25,9 @@
25#define CurrentEL_EL1 (1 << 2) 25#define CurrentEL_EL1 (1 << 2)
26#define CurrentEL_EL2 (2 << 2) 26#define CurrentEL_EL2 (2 << 2)
27 27
28/* Additional SPSR bits not exposed in the UABI */
29#define PSR_IL_BIT (1 << 20)
30
28/* AArch32-specific ptrace requests */ 31/* AArch32-specific ptrace requests */
29#define COMPAT_PTRACE_GETREGS 12 32#define COMPAT_PTRACE_GETREGS 12
30#define COMPAT_PTRACE_SETREGS 13 33#define COMPAT_PTRACE_SETREGS 13
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
deleted file mode 100644
index 2656a0fd05a6..000000000000
--- a/arch/arm64/include/asm/stage2_pgtable-nopmd.h
+++ /dev/null
@@ -1,42 +0,0 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
18#define __ARM64_S2_PGTABLE_NOPMD_H_
19
20#include <asm/stage2_pgtable-nopud.h>
21
22#define __S2_PGTABLE_PMD_FOLDED
23
24#define S2_PMD_SHIFT S2_PUD_SHIFT
25#define S2_PTRS_PER_PMD 1
26#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
27#define S2_PMD_MASK (~(S2_PMD_SIZE-1))
28
29#define stage2_pud_none(pud) (0)
30#define stage2_pud_present(pud) (1)
31#define stage2_pud_clear(pud) do { } while (0)
32#define stage2_pud_populate(pud, pmd) do { } while (0)
33#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud))
34
35#define stage2_pmd_free(pmd) do { } while (0)
36
37#define stage2_pmd_addr_end(addr, end) (end)
38
39#define stage2_pud_huge(pud) (0)
40#define stage2_pmd_table_empty(pmdp) (0)
41
42#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
deleted file mode 100644
index 5ee87b54ebf3..000000000000
--- a/arch/arm64/include/asm/stage2_pgtable-nopud.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
18#define __ARM64_S2_PGTABLE_NOPUD_H_
19
20#define __S2_PGTABLE_PUD_FOLDED
21
22#define S2_PUD_SHIFT S2_PGDIR_SHIFT
23#define S2_PTRS_PER_PUD 1
24#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
25#define S2_PUD_MASK (~(S2_PUD_SIZE-1))
26
27#define stage2_pgd_none(pgd) (0)
28#define stage2_pgd_present(pgd) (1)
29#define stage2_pgd_clear(pgd) do { } while (0)
30#define stage2_pgd_populate(pgd, pud) do { } while (0)
31
32#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd))
33
34#define stage2_pud_free(x) do { } while (0)
35
36#define stage2_pud_addr_end(addr, end) (end)
37#define stage2_pud_table_empty(pmdp) (0)
38
39#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index 8b68099348e5..d352f6df8d2c 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -19,9 +19,17 @@
19#ifndef __ARM64_S2_PGTABLE_H_ 19#ifndef __ARM64_S2_PGTABLE_H_
20#define __ARM64_S2_PGTABLE_H_ 20#define __ARM64_S2_PGTABLE_H_
21 21
22#include <linux/hugetlb.h>
22#include <asm/pgtable.h> 23#include <asm/pgtable.h>
23 24
24/* 25/*
26 * PGDIR_SHIFT determines the size a top-level page table entry can map
27 * and depends on the number of levels in the page table. Compute the
28 * PGDIR_SHIFT for a given number of levels.
29 */
30#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
31
32/*
25 * The hardware supports concatenation of up to 16 tables at stage2 entry level 33 * The hardware supports concatenation of up to 16 tables at stage2 entry level
26 * and we use the feature whenever possible. 34 * and we use the feature whenever possible.
27 * 35 *
@@ -29,112 +37,208 @@
29 * On arm64, the smallest PAGE_SIZE supported is 4k, which means 37 * On arm64, the smallest PAGE_SIZE supported is 4k, which means
30 * (PAGE_SHIFT - 3) > 4 holds for all page sizes. 38 * (PAGE_SHIFT - 3) > 4 holds for all page sizes.
31 * This implies, the total number of page table levels at stage2 expected 39 * This implies, the total number of page table levels at stage2 expected
32 * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4) 40 * by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
33 * in normal translations(e.g, stage1), since we cannot have another level in 41 * in normal translations(e.g, stage1), since we cannot have another level in
34 * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4). 42 * the range (IPA_SHIFT, IPA_SHIFT - 4).
35 */ 43 */
36#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4) 44#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
45#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
37 46
38/* 47/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */
39 * With all the supported VA_BITs and 40bit guest IPA, the following condition 48#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
40 * is always true: 49#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm))
41 * 50#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1)
42 * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
43 *
44 * We base our stage-2 page table walker helpers on this assumption and
45 * fall back to using the host version of the helper wherever possible.
46 * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
47 * to using the host version, since it is guaranteed it is not folded at host.
48 *
49 * If the condition breaks in the future, we can rearrange the host level
50 * definitions and reuse them for stage2. Till then...
51 */
52#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
53#error "Unsupported combination of guest IPA and host VA_BITS."
54#endif
55
56/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
57#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
58#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT)
59#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1))
60 51
61/* 52/*
62 * The number of PTRS across all concatenated stage2 tables given by the 53 * The number of PTRS across all concatenated stage2 tables given by the
63 * number of bits resolved at the initial level. 54 * number of bits resolved at the initial level.
55 * If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA),
56 * in which case, stage2_pgd_ptrs will have one entry.
64 */ 57 */
65#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT)) 58#define pgd_ptrs_shift(ipa, pgdir_shift) \
59 ((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0)
60#define __s2_pgd_ptrs(ipa, lvls) \
61 (1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls))))
62#define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t))
63
64#define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
65#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
66 66
67/* 67/*
68 * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation 68 * kvm_mmmu_cache_min_pages() is the number of pages required to install
69 * levels in addition to the PGD. 69 * a stage-2 translation. We pre-allocate the entry level page table at
70 * the VM creation.
70 */ 71 */
71#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1) 72#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
72 73
73 74/* Stage2 PUD definitions when the level is present */
74#if STAGE2_PGTABLE_LEVELS > 3 75static inline bool kvm_stage2_has_pud(struct kvm *kvm)
76{
77 return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3);
78}
75 79
76#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) 80#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
77#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) 81#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT)
78#define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) 82#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
79 83
80#define stage2_pgd_none(pgd) pgd_none(pgd) 84static inline bool stage2_pgd_none(struct kvm *kvm, pgd_t pgd)
81#define stage2_pgd_clear(pgd) pgd_clear(pgd) 85{
82#define stage2_pgd_present(pgd) pgd_present(pgd) 86 if (kvm_stage2_has_pud(kvm))
83#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) 87 return pgd_none(pgd);
84#define stage2_pud_offset(pgd, address) pud_offset(pgd, address) 88 else
85#define stage2_pud_free(pud) pud_free(NULL, pud) 89 return 0;
90}
86 91
87#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp) 92static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp)
93{
94 if (kvm_stage2_has_pud(kvm))
95 pgd_clear(pgdp);
96}
88 97
89static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end) 98static inline bool stage2_pgd_present(struct kvm *kvm, pgd_t pgd)
90{ 99{
91 phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; 100 if (kvm_stage2_has_pud(kvm))
101 return pgd_present(pgd);
102 else
103 return 1;
104}
92 105
93 return (boundary - 1 < end - 1) ? boundary : end; 106static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgd, pud_t *pud)
107{
108 if (kvm_stage2_has_pud(kvm))
109 pgd_populate(NULL, pgd, pud);
110}
111
112static inline pud_t *stage2_pud_offset(struct kvm *kvm,
113 pgd_t *pgd, unsigned long address)
114{
115 if (kvm_stage2_has_pud(kvm))
116 return pud_offset(pgd, address);
117 else
118 return (pud_t *)pgd;
94} 119}
95 120
96#endif /* STAGE2_PGTABLE_LEVELS > 3 */ 121static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
122{
123 if (kvm_stage2_has_pud(kvm))
124 pud_free(NULL, pud);
125}
97 126
127static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
128{
129 if (kvm_stage2_has_pud(kvm))
130 return kvm_page_empty(pudp);
131 else
132 return false;
133}
98 134
99#if STAGE2_PGTABLE_LEVELS > 2 135static inline phys_addr_t
136stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
137{
138 if (kvm_stage2_has_pud(kvm)) {
139 phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
140
141 return (boundary - 1 < end - 1) ? boundary : end;
142 } else {
143 return end;
144 }
145}
146
147/* Stage2 PMD definitions when the level is present */
148static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
149{
150 return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2);
151}
100 152
101#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) 153#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
102#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT) 154#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
103#define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) 155#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
104 156
105#define stage2_pud_none(pud) pud_none(pud) 157static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud)
106#define stage2_pud_clear(pud) pud_clear(pud) 158{
107#define stage2_pud_present(pud) pud_present(pud) 159 if (kvm_stage2_has_pmd(kvm))
108#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) 160 return pud_none(pud);
109#define stage2_pmd_offset(pud, address) pmd_offset(pud, address) 161 else
110#define stage2_pmd_free(pmd) pmd_free(NULL, pmd) 162 return 0;
163}
164
165static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud)
166{
167 if (kvm_stage2_has_pmd(kvm))
168 pud_clear(pud);
169}
111 170
112#define stage2_pud_huge(pud) pud_huge(pud) 171static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud)
113#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 172{
173 if (kvm_stage2_has_pmd(kvm))
174 return pud_present(pud);
175 else
176 return 1;
177}
114 178
115static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) 179static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd)
116{ 180{
117 phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; 181 if (kvm_stage2_has_pmd(kvm))
182 pud_populate(NULL, pud, pmd);
183}
118 184
119 return (boundary - 1 < end - 1) ? boundary : end; 185static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
186 pud_t *pud, unsigned long address)
187{
188 if (kvm_stage2_has_pmd(kvm))
189 return pmd_offset(pud, address);
190 else
191 return (pmd_t *)pud;
120} 192}
121 193
122#endif /* STAGE2_PGTABLE_LEVELS > 2 */ 194static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
195{
196 if (kvm_stage2_has_pmd(kvm))
197 pmd_free(NULL, pmd);
198}
199
200static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
201{
202 if (kvm_stage2_has_pmd(kvm))
203 return pud_huge(pud);
204 else
205 return 0;
206}
207
208static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
209{
210 if (kvm_stage2_has_pmd(kvm))
211 return kvm_page_empty(pmdp);
212 else
213 return 0;
214}
123 215
124#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) 216static inline phys_addr_t
217stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
218{
219 if (kvm_stage2_has_pmd(kvm)) {
220 phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
125 221
126#if STAGE2_PGTABLE_LEVELS == 2 222 return (boundary - 1 < end - 1) ? boundary : end;
127#include <asm/stage2_pgtable-nopmd.h> 223 } else {
128#elif STAGE2_PGTABLE_LEVELS == 3 224 return end;
129#include <asm/stage2_pgtable-nopud.h> 225 }
130#endif 226}
131 227
228static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep)
229{
230 return kvm_page_empty(ptep);
231}
132 232
133#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) 233static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
234{
235 return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1));
236}
134 237
135static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) 238static inline phys_addr_t
239stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
136{ 240{
137 phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK; 241 phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
138 242
139 return (boundary - 1 < end - 1) ? boundary : end; 243 return (boundary - 1 < end - 1) ? boundary : end;
140} 244}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 07256b08226c..a74f84d09412 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -338,15 +338,15 @@ int __attribute_const__ kvm_target_cpu(void)
338 return KVM_ARM_TARGET_CORTEX_A53; 338 return KVM_ARM_TARGET_CORTEX_A53;
339 case ARM_CPU_PART_CORTEX_A57: 339 case ARM_CPU_PART_CORTEX_A57:
340 return KVM_ARM_TARGET_CORTEX_A57; 340 return KVM_ARM_TARGET_CORTEX_A57;
341 }; 341 }
342 break; 342 break;
343 case ARM_CPU_IMP_APM: 343 case ARM_CPU_IMP_APM:
344 switch (part_number) { 344 switch (part_number) {
345 case APM_CPU_PART_POTENZA: 345 case APM_CPU_PART_POTENZA:
346 return KVM_ARM_TARGET_XGENE_POTENZA; 346 return KVM_ARM_TARGET_XGENE_POTENZA;
347 }; 347 }
348 break; 348 break;
349 }; 349 }
350 350
351 /* Return a default generic target */ 351 /* Return a default generic target */
352 return KVM_ARM_TARGET_GENERIC_V8; 352 return KVM_ARM_TARGET_GENERIC_V8;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e5e741bfffe1..35a81bebd02b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -284,6 +284,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
284 */ 284 */
285 run->exit_reason = KVM_EXIT_FAIL_ENTRY; 285 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
286 return 0; 286 return 0;
287 case ARM_EXCEPTION_IL:
288 /*
289 * We attempted an illegal exception return. Guest state must
290 * have been corrupted somehow. Give up.
291 */
292 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
293 return -EINVAL;
287 default: 294 default:
288 kvm_pr_unimpl("Unsupported exception type: %d", 295 kvm_pr_unimpl("Unsupported exception type: %d",
289 exception_index); 296 exception_index);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 2fabc2dc1966..82d1904328ad 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
19obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o 19obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
20obj-$(CONFIG_KVM_ARM_HOST) += tlb.o 20obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
21obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o 21obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
22obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
23 22
24# KVM code is run at a different exception code with a different map, so 23# KVM code is run at a different exception code with a different map, so
25# compiler instrumentation that inserts callbacks or checks into the code may 24# compiler instrumentation that inserts callbacks or checks into the code may
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 24b4fbafe3e4..b1f14f736962 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -162,6 +162,20 @@ el1_error:
162 mov x0, #ARM_EXCEPTION_EL1_SERROR 162 mov x0, #ARM_EXCEPTION_EL1_SERROR
163 b __guest_exit 163 b __guest_exit
164 164
165el2_sync:
166 /* Check for illegal exception return, otherwise panic */
167 mrs x0, spsr_el2
168
169 /* if this was something else, then panic! */
170 tst x0, #PSR_IL_BIT
171 b.eq __hyp_panic
172
173 /* Let's attempt a recovery from the illegal exception return */
174 get_vcpu_ptr x1, x0
175 mov x0, #ARM_EXCEPTION_IL
176 b __guest_exit
177
178
165el2_error: 179el2_error:
166 ldp x0, x1, [sp], #16 180 ldp x0, x1, [sp], #16
167 181
@@ -240,7 +254,7 @@ ENTRY(__kvm_hyp_vector)
240 invalid_vect el2t_fiq_invalid // FIQ EL2t 254 invalid_vect el2t_fiq_invalid // FIQ EL2t
241 invalid_vect el2t_error_invalid // Error EL2t 255 invalid_vect el2t_error_invalid // Error EL2t
242 256
243 invalid_vect el2h_sync_invalid // Synchronous EL2h 257 valid_vect el2_sync // Synchronous EL2h
244 invalid_vect el2h_irq_invalid // IRQ EL2h 258 invalid_vect el2h_irq_invalid // IRQ EL2h
245 invalid_vect el2h_fiq_invalid // FIQ EL2h 259 invalid_vect el2h_fiq_invalid // FIQ EL2h
246 valid_vect el2_error // Error EL2h 260 valid_vect el2_error // Error EL2h
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
deleted file mode 100644
index 603e1ee83e89..000000000000
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ /dev/null
@@ -1,90 +0,0 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/types.h>
19#include <asm/kvm_arm.h>
20#include <asm/kvm_asm.h>
21#include <asm/kvm_hyp.h>
22
23u32 __hyp_text __init_stage2_translation(void)
24{
25 u64 val = VTCR_EL2_FLAGS;
26 u64 parange;
27 u64 tmp;
28
29 /*
30 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
31 * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
32 * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
33 */
34 parange = read_sysreg(id_aa64mmfr0_el1) & 7;
35 if (parange > ID_AA64MMFR0_PARANGE_MAX)
36 parange = ID_AA64MMFR0_PARANGE_MAX;
37 val |= parange << 16;
38
39 /* Compute the actual PARange... */
40 switch (parange) {
41 case 0:
42 parange = 32;
43 break;
44 case 1:
45 parange = 36;
46 break;
47 case 2:
48 parange = 40;
49 break;
50 case 3:
51 parange = 42;
52 break;
53 case 4:
54 parange = 44;
55 break;
56 case 5:
57 default:
58 parange = 48;
59 break;
60 }
61
62 /*
63 * ... and clamp it to 40 bits, unless we have some braindead
64 * HW that implements less than that. In all cases, we'll
65 * return that value for the rest of the kernel to decide what
66 * to do.
67 */
68 val |= 64 - (parange > 40 ? 40 : parange);
69
70 /*
71 * Check the availability of Hardware Access Flag / Dirty Bit
72 * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
73 */
74 tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
75 if (tmp)
76 val |= VTCR_EL2_HA;
77
78 /*
79 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
80 * bit in VTCR_EL2.
81 */
82 tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf;
83 val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ?
84 VTCR_EL2_VS_16BIT :
85 VTCR_EL2_VS_8BIT;
86
87 write_sysreg(val, vtcr_el2);
88
89 return parange;
90}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ca46153d7915..7cc175c88a37 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -198,7 +198,7 @@ void deactivate_traps_vhe_put(void)
198 198
199static void __hyp_text __activate_vm(struct kvm *kvm) 199static void __hyp_text __activate_vm(struct kvm *kvm)
200{ 200{
201 write_sysreg(kvm->arch.vttbr, vttbr_el2); 201 __load_guest_stage2(kvm);
202} 202}
203 203
204static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) 204static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
@@ -263,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
263 return false; /* Translation failed, back to guest */ 263 return false; /* Translation failed, back to guest */
264 264
265 /* Convert PAR to HPFAR format */ 265 /* Convert PAR to HPFAR format */
266 *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4; 266 *hpfar = PAR_TO_HPFAR(tmp);
267 return true; 267 return true;
268} 268}
269 269
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 9ce223944983..8dc285318204 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -152,8 +152,25 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
152static void __hyp_text 152static void __hyp_text
153__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) 153__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
154{ 154{
155 u64 pstate = ctxt->gp_regs.regs.pstate;
156 u64 mode = pstate & PSR_AA32_MODE_MASK;
157
158 /*
159 * Safety check to ensure we're setting the CPU up to enter the guest
160 * in a less privileged mode.
161 *
162 * If we are attempting a return to EL2 or higher in AArch64 state,
163 * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
164 * we'll take an illegal exception state exception immediately after
165 * the ERET to the guest. Attempts to return to AArch32 Hyp will
166 * result in an illegal exception return because EL2's execution state
167 * is determined by SCR_EL3.RW.
168 */
169 if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
170 pstate = PSR_MODE_EL2h | PSR_IL_BIT;
171
155 write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); 172 write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
156 write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); 173 write_sysreg_el2(pstate, spsr);
157 174
158 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) 175 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
159 write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); 176 write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 131c7772703c..4dbd9c69a96d 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
30 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so 30 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
31 * let's flip TGE before executing the TLB operation. 31 * let's flip TGE before executing the TLB operation.
32 */ 32 */
33 write_sysreg(kvm->arch.vttbr, vttbr_el2); 33 __load_guest_stage2(kvm);
34 val = read_sysreg(hcr_el2); 34 val = read_sysreg(hcr_el2);
35 val &= ~HCR_TGE; 35 val &= ~HCR_TGE;
36 write_sysreg(val, hcr_el2); 36 write_sysreg(val, hcr_el2);
@@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
39 39
40static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) 40static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
41{ 41{
42 write_sysreg(kvm->arch.vttbr, vttbr_el2); 42 __load_guest_stage2(kvm);
43 isb(); 43 isb();
44} 44}
45 45
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index e37c78bbe1ca..b72a3dd56204 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -26,6 +26,7 @@
26 26
27#include <kvm/arm_arch_timer.h> 27#include <kvm/arm_arch_timer.h>
28 28
29#include <asm/cpufeature.h>
29#include <asm/cputype.h> 30#include <asm/cputype.h>
30#include <asm/ptrace.h> 31#include <asm/ptrace.h>
31#include <asm/kvm_arm.h> 32#include <asm/kvm_arm.h>
@@ -33,6 +34,9 @@
33#include <asm/kvm_coproc.h> 34#include <asm/kvm_coproc.h>
34#include <asm/kvm_mmu.h> 35#include <asm/kvm_mmu.h>
35 36
37/* Maximum phys_shift supported for any VM on this host */
38static u32 kvm_ipa_limit;
39
36/* 40/*
37 * ARMv8 Reset Values 41 * ARMv8 Reset Values
38 */ 42 */
@@ -55,12 +59,12 @@ static bool cpu_has_32bit_el1(void)
55} 59}
56 60
57/** 61/**
58 * kvm_arch_dev_ioctl_check_extension 62 * kvm_arch_vm_ioctl_check_extension
59 * 63 *
60 * We currently assume that the number of HW registers is uniform 64 * We currently assume that the number of HW registers is uniform
61 * across all CPUs (see cpuinfo_sanity_check). 65 * across all CPUs (see cpuinfo_sanity_check).
62 */ 66 */
63int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) 67int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
64{ 68{
65 int r; 69 int r;
66 70
@@ -82,9 +86,11 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
82 break; 86 break;
83 case KVM_CAP_SET_GUEST_DEBUG: 87 case KVM_CAP_SET_GUEST_DEBUG:
84 case KVM_CAP_VCPU_ATTRIBUTES: 88 case KVM_CAP_VCPU_ATTRIBUTES:
85 case KVM_CAP_VCPU_EVENTS:
86 r = 1; 89 r = 1;
87 break; 90 break;
91 case KVM_CAP_ARM_VM_IPA_SIZE:
92 r = kvm_ipa_limit;
93 break;
88 default: 94 default:
89 r = 0; 95 r = 0;
90 } 96 }
@@ -133,3 +139,99 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
133 /* Reset timer */ 139 /* Reset timer */
134 return kvm_timer_vcpu_reset(vcpu); 140 return kvm_timer_vcpu_reset(vcpu);
135} 141}
142
143void kvm_set_ipa_limit(void)
144{
145 unsigned int ipa_max, pa_max, va_max, parange;
146
147 parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
148 pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
149
150 /* Clamp the IPA limit to the PA size supported by the kernel */
151 ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max;
152 /*
153 * Since our stage2 table is dependent on the stage1 page table code,
154 * we must always honor the following condition:
155 *
156 * Number of levels in Stage1 >= Number of levels in Stage2.
157 *
158 * So clamp the ipa limit further down to limit the number of levels.
159 * Since we can concatenate upto 16 tables at entry level, we could
160 * go upto 4bits above the maximum VA addressible with the current
161 * number of levels.
162 */
163 va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
164 va_max += 4;
165
166 if (va_max < ipa_max)
167 ipa_max = va_max;
168
169 /*
170 * If the final limit is lower than the real physical address
171 * limit of the CPUs, report the reason.
172 */
173 if (ipa_max < pa_max)
174 pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n",
175 (va_max < pa_max) ? "Virtual" : "Physical");
176
177 WARN(ipa_max < KVM_PHYS_SHIFT,
178 "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max);
179 kvm_ipa_limit = ipa_max;
180 kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit);
181}
182
183/*
184 * Configure the VTCR_EL2 for this VM. The VTCR value is common
185 * across all the physical CPUs on the system. We use system wide
186 * sanitised values to fill in different fields, except for Hardware
187 * Management of Access Flags. HA Flag is set unconditionally on
188 * all CPUs, as it is safe to run with or without the feature and
189 * the bit is RES0 on CPUs that don't support it.
190 */
191int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
192{
193 u64 vtcr = VTCR_EL2_FLAGS;
194 u32 parange, phys_shift;
195 u8 lvls;
196
197 if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
198 return -EINVAL;
199
200 phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
201 if (phys_shift) {
202 if (phys_shift > kvm_ipa_limit ||
203 phys_shift < 32)
204 return -EINVAL;
205 } else {
206 phys_shift = KVM_PHYS_SHIFT;
207 }
208
209 parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
210 if (parange > ID_AA64MMFR0_PARANGE_MAX)
211 parange = ID_AA64MMFR0_PARANGE_MAX;
212 vtcr |= parange << VTCR_EL2_PS_SHIFT;
213
214 vtcr |= VTCR_EL2_T0SZ(phys_shift);
215 /*
216 * Use a minimum 2 level page table to prevent splitting
217 * host PMD huge pages at stage2.
218 */
219 lvls = stage2_pgtable_levels(phys_shift);
220 if (lvls < 2)
221 lvls = 2;
222 vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
223
224 /*
225 * Enable the Hardware Access Flag management, unconditionally
226 * on all CPUs. The features is RES0 on CPUs without the support
227 * and must be ignored by the CPUs.
228 */
229 vtcr |= VTCR_EL2_HA;
230
231 /* Set the vmid bits */
232 vtcr |= (kvm_get_vmid_bits() == 16) ?
233 VTCR_EL2_VS_16BIT :
234 VTCR_EL2_VS_8BIT;
235 kvm->arch.vtcr = vtcr;
236 return 0;
237}
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index e203169931c7..6390bd8c141b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -14,6 +14,16 @@
14#ifndef _ASM_X86_FIXMAP_H 14#ifndef _ASM_X86_FIXMAP_H
15#define _ASM_X86_FIXMAP_H 15#define _ASM_X86_FIXMAP_H
16 16
17/*
18 * Exposed to assembly code for setting up initial page tables. Cannot be
19 * calculated in assembly code (fixmap entries are an enum), but is sanity
20 * checked in the actual fixmap C code to make sure that the fixmap is
21 * covered fully.
22 */
23#define FIXMAP_PMD_NUM 2
24/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
25#define FIXMAP_PMD_TOP 507
26
17#ifndef __ASSEMBLY__ 27#ifndef __ASSEMBLY__
18#include <linux/kernel.h> 28#include <linux/kernel.h>
19#include <asm/acpi.h> 29#include <asm/acpi.h>
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c0643831706e..616f8e637bc3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
48 48
49/* Architecture __weak replacement functions */ 49/* Architecture __weak replacement functions */
50void __init mem_encrypt_init(void); 50void __init mem_encrypt_init(void);
51void __init mem_encrypt_free_decrypted_mem(void);
51 52
52bool sme_active(void); 53bool sme_active(void);
53bool sev_active(void); 54bool sev_active(void);
54 55
56#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
57
55#else /* !CONFIG_AMD_MEM_ENCRYPT */ 58#else /* !CONFIG_AMD_MEM_ENCRYPT */
56 59
57#define sme_me_mask 0ULL 60#define sme_me_mask 0ULL
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
77static inline int __init 80static inline int __init
78early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } 81early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
79 82
83#define __bss_decrypted
84
80#endif /* CONFIG_AMD_MEM_ENCRYPT */ 85#endif /* CONFIG_AMD_MEM_ENCRYPT */
81 86
82/* 87/*
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
88#define __sme_pa(x) (__pa(x) | sme_me_mask) 93#define __sme_pa(x) (__pa(x) | sme_me_mask)
89#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) 94#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
90 95
96extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
97
91#endif /* __ASSEMBLY__ */ 98#endif /* __ASSEMBLY__ */
92 99
93#endif /* __X86_MEM_ENCRYPT_H__ */ 100#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ce2b59047cb8..9c85b54bf03c 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -14,6 +14,7 @@
14#include <asm/processor.h> 14#include <asm/processor.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/threads.h> 16#include <linux/threads.h>
17#include <asm/fixmap.h>
17 18
18extern p4d_t level4_kernel_pgt[512]; 19extern p4d_t level4_kernel_pgt[512];
19extern p4d_t level4_ident_pgt[512]; 20extern p4d_t level4_ident_pgt[512];
@@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512];
22extern pmd_t level2_kernel_pgt[512]; 23extern pmd_t level2_kernel_pgt[512];
23extern pmd_t level2_fixmap_pgt[512]; 24extern pmd_t level2_fixmap_pgt[512];
24extern pmd_t level2_ident_pgt[512]; 25extern pmd_t level2_ident_pgt[512];
25extern pte_t level1_fixmap_pgt[512]; 26extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
26extern pgd_t init_top_pgt[]; 27extern pgd_t init_top_pgt[];
27 28
28#define swapper_pg_dir init_top_pgt 29#define swapper_pg_dir init_top_pgt
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 4e588f36228f..285eb3ec4200 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e)
382 e <= QOS_L3_MBM_LOCAL_EVENT_ID); 382 e <= QOS_L3_MBM_LOCAL_EVENT_ID);
383} 383}
384 384
385struct rdt_parse_data {
386 struct rdtgroup *rdtgrp;
387 char *buf;
388};
389
385/** 390/**
386 * struct rdt_resource - attributes of an RDT resource 391 * struct rdt_resource - attributes of an RDT resource
387 * @rid: The index of the resource 392 * @rid: The index of the resource
@@ -423,16 +428,19 @@ struct rdt_resource {
423 struct rdt_cache cache; 428 struct rdt_cache cache;
424 struct rdt_membw membw; 429 struct rdt_membw membw;
425 const char *format_str; 430 const char *format_str;
426 int (*parse_ctrlval) (void *data, struct rdt_resource *r, 431 int (*parse_ctrlval)(struct rdt_parse_data *data,
427 struct rdt_domain *d); 432 struct rdt_resource *r,
433 struct rdt_domain *d);
428 struct list_head evt_list; 434 struct list_head evt_list;
429 int num_rmid; 435 int num_rmid;
430 unsigned int mon_scale; 436 unsigned int mon_scale;
431 unsigned long fflags; 437 unsigned long fflags;
432}; 438};
433 439
434int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d); 440int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
435int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d); 441 struct rdt_domain *d);
442int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
443 struct rdt_domain *d);
436 444
437extern struct mutex rdtgroup_mutex; 445extern struct mutex rdtgroup_mutex;
438 446
@@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
536void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); 544void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
537struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); 545struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
538int update_domains(struct rdt_resource *r, int closid); 546int update_domains(struct rdt_resource *r, int closid);
547int closids_supported(void);
539void closid_free(int closid); 548void closid_free(int closid);
540int alloc_rmid(void); 549int alloc_rmid(void);
541void free_rmid(u32 rmid); 550void free_rmid(u32 rmid);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index af358ca05160..0f53049719cd 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
64 return true; 64 return true;
65} 65}
66 66
67int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d) 67int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
68 struct rdt_domain *d)
68{ 69{
69 unsigned long data; 70 unsigned long bw_val;
70 char *buf = _buf;
71 71
72 if (d->have_new_ctrl) { 72 if (d->have_new_ctrl) {
73 rdt_last_cmd_printf("duplicate domain %d\n", d->id); 73 rdt_last_cmd_printf("duplicate domain %d\n", d->id);
74 return -EINVAL; 74 return -EINVAL;
75 } 75 }
76 76
77 if (!bw_validate(buf, &data, r)) 77 if (!bw_validate(data->buf, &bw_val, r))
78 return -EINVAL; 78 return -EINVAL;
79 d->new_ctrl = data; 79 d->new_ctrl = bw_val;
80 d->have_new_ctrl = true; 80 d->have_new_ctrl = true;
81 81
82 return 0; 82 return 0;
@@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
123 return true; 123 return true;
124} 124}
125 125
126struct rdt_cbm_parse_data {
127 struct rdtgroup *rdtgrp;
128 char *buf;
129};
130
131/* 126/*
132 * Read one cache bit mask (hex). Check that it is valid for the current 127 * Read one cache bit mask (hex). Check that it is valid for the current
133 * resource type. 128 * resource type.
134 */ 129 */
135int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) 130int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
131 struct rdt_domain *d)
136{ 132{
137 struct rdt_cbm_parse_data *data = _data;
138 struct rdtgroup *rdtgrp = data->rdtgrp; 133 struct rdtgroup *rdtgrp = data->rdtgrp;
139 u32 cbm_val; 134 u32 cbm_val;
140 135
@@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
195static int parse_line(char *line, struct rdt_resource *r, 190static int parse_line(char *line, struct rdt_resource *r,
196 struct rdtgroup *rdtgrp) 191 struct rdtgroup *rdtgrp)
197{ 192{
198 struct rdt_cbm_parse_data data; 193 struct rdt_parse_data data;
199 char *dom = NULL, *id; 194 char *dom = NULL, *id;
200 struct rdt_domain *d; 195 struct rdt_domain *d;
201 unsigned long dom_id; 196 unsigned long dom_id;
202 197
198 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
199 r->rid == RDT_RESOURCE_MBA) {
200 rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
201 return -EINVAL;
202 }
203
203next: 204next:
204 if (!line || line[0] == '\0') 205 if (!line || line[0] == '\0')
205 return 0; 206 return 0;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b799c00bef09..1b8e86a5d5e1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...)
97 * limited as the number of resources grows. 97 * limited as the number of resources grows.
98 */ 98 */
99static int closid_free_map; 99static int closid_free_map;
100static int closid_free_map_len;
101
102int closids_supported(void)
103{
104 return closid_free_map_len;
105}
100 106
101static void closid_init(void) 107static void closid_init(void)
102{ 108{
@@ -111,6 +117,7 @@ static void closid_init(void)
111 117
112 /* CLOSID 0 is always reserved for the default group */ 118 /* CLOSID 0 is always reserved for the default group */
113 closid_free_map &= ~1; 119 closid_free_map &= ~1;
120 closid_free_map_len = rdt_min_closid;
114} 121}
115 122
116static int closid_alloc(void) 123static int closid_alloc(void)
@@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
802 sw_shareable = 0; 809 sw_shareable = 0;
803 exclusive = 0; 810 exclusive = 0;
804 seq_printf(seq, "%d=", dom->id); 811 seq_printf(seq, "%d=", dom->id);
805 for (i = 0; i < r->num_closid; i++, ctrl++) { 812 for (i = 0; i < closids_supported(); i++, ctrl++) {
806 if (!closid_allocated(i)) 813 if (!closid_allocated(i))
807 continue; 814 continue;
808 mode = rdtgroup_mode_by_closid(i); 815 mode = rdtgroup_mode_by_closid(i);
@@ -989,7 +996,7 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
989 996
990 /* Check for overlap with other resource groups */ 997 /* Check for overlap with other resource groups */
991 ctrl = d->ctrl_val; 998 ctrl = d->ctrl_val;
992 for (i = 0; i < r->num_closid; i++, ctrl++) { 999 for (i = 0; i < closids_supported(); i++, ctrl++) {
993 ctrl_b = (unsigned long *)ctrl; 1000 ctrl_b = (unsigned long *)ctrl;
994 mode = rdtgroup_mode_by_closid(i); 1001 mode = rdtgroup_mode_by_closid(i);
995 if (closid_allocated(i) && i != closid && 1002 if (closid_allocated(i) && i != closid &&
@@ -1024,16 +1031,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1024{ 1031{
1025 int closid = rdtgrp->closid; 1032 int closid = rdtgrp->closid;
1026 struct rdt_resource *r; 1033 struct rdt_resource *r;
1034 bool has_cache = false;
1027 struct rdt_domain *d; 1035 struct rdt_domain *d;
1028 1036
1029 for_each_alloc_enabled_rdt_resource(r) { 1037 for_each_alloc_enabled_rdt_resource(r) {
1038 if (r->rid == RDT_RESOURCE_MBA)
1039 continue;
1040 has_cache = true;
1030 list_for_each_entry(d, &r->domains, list) { 1041 list_for_each_entry(d, &r->domains, list) {
1031 if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], 1042 if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1032 rdtgrp->closid, false)) 1043 rdtgrp->closid, false)) {
1044 rdt_last_cmd_puts("schemata overlaps\n");
1033 return false; 1045 return false;
1046 }
1034 } 1047 }
1035 } 1048 }
1036 1049
1050 if (!has_cache) {
1051 rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
1052 return false;
1053 }
1054
1037 return true; 1055 return true;
1038} 1056}
1039 1057
@@ -1085,7 +1103,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1085 rdtgrp->mode = RDT_MODE_SHAREABLE; 1103 rdtgrp->mode = RDT_MODE_SHAREABLE;
1086 } else if (!strcmp(buf, "exclusive")) { 1104 } else if (!strcmp(buf, "exclusive")) {
1087 if (!rdtgroup_mode_test_exclusive(rdtgrp)) { 1105 if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1088 rdt_last_cmd_printf("schemata overlaps\n");
1089 ret = -EINVAL; 1106 ret = -EINVAL;
1090 goto out; 1107 goto out;
1091 } 1108 }
@@ -1155,8 +1172,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1155 struct rdt_resource *r; 1172 struct rdt_resource *r;
1156 struct rdt_domain *d; 1173 struct rdt_domain *d;
1157 unsigned int size; 1174 unsigned int size;
1158 bool sep = false; 1175 bool sep;
1159 u32 cbm; 1176 u32 ctrl;
1160 1177
1161 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1178 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1162 if (!rdtgrp) { 1179 if (!rdtgrp) {
@@ -1174,6 +1191,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1174 } 1191 }
1175 1192
1176 for_each_alloc_enabled_rdt_resource(r) { 1193 for_each_alloc_enabled_rdt_resource(r) {
1194 sep = false;
1177 seq_printf(s, "%*s:", max_name_width, r->name); 1195 seq_printf(s, "%*s:", max_name_width, r->name);
1178 list_for_each_entry(d, &r->domains, list) { 1196 list_for_each_entry(d, &r->domains, list) {
1179 if (sep) 1197 if (sep)
@@ -1181,8 +1199,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
1181 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1199 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1182 size = 0; 1200 size = 0;
1183 } else { 1201 } else {
1184 cbm = d->ctrl_val[rdtgrp->closid]; 1202 ctrl = (!is_mba_sc(r) ?
1185 size = rdtgroup_cbm_to_size(r, d, cbm); 1203 d->ctrl_val[rdtgrp->closid] :
1204 d->mbps_val[rdtgrp->closid]);
1205 if (r->rid == RDT_RESOURCE_MBA)
1206 size = ctrl;
1207 else
1208 size = rdtgroup_cbm_to_size(r, d, ctrl);
1186 } 1209 }
1187 seq_printf(s, "%d=%u", d->id, size); 1210 seq_printf(s, "%d=%u", d->id, size);
1188 sep = true; 1211 sep = true;
@@ -2336,12 +2359,18 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2336 u32 *ctrl; 2359 u32 *ctrl;
2337 2360
2338 for_each_alloc_enabled_rdt_resource(r) { 2361 for_each_alloc_enabled_rdt_resource(r) {
2362 /*
2363 * Only initialize default allocations for CBM cache
2364 * resources
2365 */
2366 if (r->rid == RDT_RESOURCE_MBA)
2367 continue;
2339 list_for_each_entry(d, &r->domains, list) { 2368 list_for_each_entry(d, &r->domains, list) {
2340 d->have_new_ctrl = false; 2369 d->have_new_ctrl = false;
2341 d->new_ctrl = r->cache.shareable_bits; 2370 d->new_ctrl = r->cache.shareable_bits;
2342 used_b = r->cache.shareable_bits; 2371 used_b = r->cache.shareable_bits;
2343 ctrl = d->ctrl_val; 2372 ctrl = d->ctrl_val;
2344 for (i = 0; i < r->num_closid; i++, ctrl++) { 2373 for (i = 0; i < closids_supported(); i++, ctrl++) {
2345 if (closid_allocated(i) && i != closid) { 2374 if (closid_allocated(i) && i != closid) {
2346 mode = rdtgroup_mode_by_closid(i); 2375 mode = rdtgroup_mode_by_closid(i);
2347 if (mode == RDT_MODE_PSEUDO_LOCKSETUP) 2376 if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
@@ -2373,6 +2402,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2373 } 2402 }
2374 2403
2375 for_each_alloc_enabled_rdt_resource(r) { 2404 for_each_alloc_enabled_rdt_resource(r) {
2405 /*
2406 * Only initialize default allocations for CBM cache
2407 * resources
2408 */
2409 if (r->rid == RDT_RESOURCE_MBA)
2410 continue;
2376 ret = update_domains(r, rdtgrp->closid); 2411 ret = update_domains(r, rdtgrp->closid);
2377 if (ret < 0) { 2412 if (ret < 0) {
2378 rdt_last_cmd_puts("failed to initialize allocations\n"); 2413 rdt_last_cmd_puts("failed to initialize allocations\n");
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379e575a..ddee1f0870c4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -35,6 +35,7 @@
35#include <asm/bootparam_utils.h> 35#include <asm/bootparam_utils.h>
36#include <asm/microcode.h> 36#include <asm/microcode.h>
37#include <asm/kasan.h> 37#include <asm/kasan.h>
38#include <asm/fixmap.h>
38 39
39/* 40/*
40 * Manage page tables very early on. 41 * Manage page tables very early on.
@@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr)
112unsigned long __head __startup_64(unsigned long physaddr, 113unsigned long __head __startup_64(unsigned long physaddr,
113 struct boot_params *bp) 114 struct boot_params *bp)
114{ 115{
116 unsigned long vaddr, vaddr_end;
115 unsigned long load_delta, *p; 117 unsigned long load_delta, *p;
116 unsigned long pgtable_flags; 118 unsigned long pgtable_flags;
117 pgdval_t *pgd; 119 pgdval_t *pgd;
@@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
165 pud[511] += load_delta; 167 pud[511] += load_delta;
166 168
167 pmd = fixup_pointer(level2_fixmap_pgt, physaddr); 169 pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
168 pmd[506] += load_delta; 170 for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
171 pmd[i] += load_delta;
169 172
170 /* 173 /*
171 * Set up the identity mapping for the switchover. These 174 * Set up the identity mapping for the switchover. These
@@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
235 sme_encrypt_kernel(bp); 238 sme_encrypt_kernel(bp);
236 239
237 /* 240 /*
241 * Clear the memory encryption mask from the .bss..decrypted section.
242 * The bss section will be memset to zero later in the initialization so
243 * there is no need to zero it after changing the memory encryption
244 * attribute.
245 */
246 if (mem_encrypt_active()) {
247 vaddr = (unsigned long)__start_bss_decrypted;
248 vaddr_end = (unsigned long)__end_bss_decrypted;
249 for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
250 i = pmd_index(vaddr);
251 pmd[i] -= sme_get_me_mask();
252 }
253 }
254
255 /*
238 * Return the SME encryption mask (if SME is active) to be used as a 256 * Return the SME encryption mask (if SME is active) to be used as a
239 * modifier for the initial pgdir entry programmed into CR3. 257 * modifier for the initial pgdir entry programmed into CR3.
240 */ 258 */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 15ebc2fc166e..a3618cf04cf6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -24,6 +24,7 @@
24#include "../entry/calling.h" 24#include "../entry/calling.h"
25#include <asm/export.h> 25#include <asm/export.h>
26#include <asm/nospec-branch.h> 26#include <asm/nospec-branch.h>
27#include <asm/fixmap.h>
27 28
28#ifdef CONFIG_PARAVIRT 29#ifdef CONFIG_PARAVIRT
29#include <asm/asm-offsets.h> 30#include <asm/asm-offsets.h>
@@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt)
445 KERNEL_IMAGE_SIZE/PMD_SIZE) 446 KERNEL_IMAGE_SIZE/PMD_SIZE)
446 447
447NEXT_PAGE(level2_fixmap_pgt) 448NEXT_PAGE(level2_fixmap_pgt)
448 .fill 506,8,0 449 .fill (512 - 4 - FIXMAP_PMD_NUM),8,0
449 .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 450 pgtno = 0
450 /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ 451 .rept (FIXMAP_PMD_NUM)
451 .fill 5,8,0 452 .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
453 + _PAGE_TABLE_NOENC;
454 pgtno = pgtno + 1
455 .endr
456 /* 6 MB reserved space + a 2MB hole */
457 .fill 4,8,0
452 458
453NEXT_PAGE(level1_fixmap_pgt) 459NEXT_PAGE(level1_fixmap_pgt)
460 .rept (FIXMAP_PMD_NUM)
454 .fill 512,8,0 461 .fill 512,8,0
462 .endr
455 463
456#undef PMDS 464#undef PMDS
457 465
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e6764648af3..013fe3d21dbb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
28#include <linux/sched/clock.h> 28#include <linux/sched/clock.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/set_memory.h>
31 32
32#include <asm/hypervisor.h> 33#include <asm/hypervisor.h>
33#include <asm/mem_encrypt.h> 34#include <asm/mem_encrypt.h>
@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
61 (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) 62 (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
62 63
63static struct pvclock_vsyscall_time_info 64static struct pvclock_vsyscall_time_info
64 hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE); 65 hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
65static struct pvclock_wall_clock wall_clock; 66static struct pvclock_wall_clock wall_clock __bss_decrypted;
66static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); 67static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
68static struct pvclock_vsyscall_time_info *hvclock_mem;
67 69
68static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) 70static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
69{ 71{
@@ -236,6 +238,45 @@ static void kvm_shutdown(void)
236 native_machine_shutdown(); 238 native_machine_shutdown();
237} 239}
238 240
241static void __init kvmclock_init_mem(void)
242{
243 unsigned long ncpus;
244 unsigned int order;
245 struct page *p;
246 int r;
247
248 if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
249 return;
250
251 ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
252 order = get_order(ncpus * sizeof(*hvclock_mem));
253
254 p = alloc_pages(GFP_KERNEL, order);
255 if (!p) {
256 pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
257 return;
258 }
259
260 hvclock_mem = page_address(p);
261
262 /*
263 * hvclock is shared between the guest and the hypervisor, must
264 * be mapped decrypted.
265 */
266 if (sev_active()) {
267 r = set_memory_decrypted((unsigned long) hvclock_mem,
268 1UL << order);
269 if (r) {
270 __free_pages(p, order);
271 hvclock_mem = NULL;
272 pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
273 return;
274 }
275 }
276
277 memset(hvclock_mem, 0, PAGE_SIZE << order);
278}
279
239static int __init kvm_setup_vsyscall_timeinfo(void) 280static int __init kvm_setup_vsyscall_timeinfo(void)
240{ 281{
241#ifdef CONFIG_X86_64 282#ifdef CONFIG_X86_64
@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
250 291
251 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; 292 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
252#endif 293#endif
294
295 kvmclock_init_mem();
296
253 return 0; 297 return 0;
254} 298}
255early_initcall(kvm_setup_vsyscall_timeinfo); 299early_initcall(kvm_setup_vsyscall_timeinfo);
@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu)
269 /* Use the static page for the first CPUs, allocate otherwise */ 313 /* Use the static page for the first CPUs, allocate otherwise */
270 if (cpu < HVC_BOOT_ARRAY_SIZE) 314 if (cpu < HVC_BOOT_ARRAY_SIZE)
271 p = &hv_clock_boot[cpu]; 315 p = &hv_clock_boot[cpu];
316 else if (hvclock_mem)
317 p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
272 else 318 else
273 p = kzalloc(sizeof(*p), GFP_KERNEL); 319 return -ENOMEM;
274 320
275 per_cpu(hv_clock_per_cpu, cpu) = p; 321 per_cpu(hv_clock_per_cpu, cpu) = p;
276 return p ? 0 : -ENOMEM; 322 return p ? 0 : -ENOMEM;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index afdb303285f8..8dc69d82567e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf,
91 91
92 if (len < 5) { 92 if (len < 5) {
93#ifdef CONFIG_RETPOLINE 93#ifdef CONFIG_RETPOLINE
94 WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); 94 WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
95#endif 95#endif
96 return len; /* call too long for patch site */ 96 return len; /* call too long for patch site */
97 } 97 }
@@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
111 111
112 if (len < 5) { 112 if (len < 5) {
113#ifdef CONFIG_RETPOLINE 113#ifdef CONFIG_RETPOLINE
114 WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); 114 WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
115#endif 115#endif
116 return len; /* call too long for patch site */ 116 return len; /* call too long for patch site */
117 } 117 }
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a419f86..5dd3317d761f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
65#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); 65#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
66#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); 66#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
67 67
68/*
69 * This section contains data which will be mapped as decrypted. Memory
70 * encryption operates on a page basis. Make this section PMD-aligned
71 * to avoid splitting the pages while mapping the section early.
72 *
73 * Note: We use a separate section so that only this section gets
74 * decrypted to avoid exposing more than we wish.
75 */
76#define BSS_DECRYPTED \
77 . = ALIGN(PMD_SIZE); \
78 __start_bss_decrypted = .; \
79 *(.bss..decrypted); \
80 . = ALIGN(PAGE_SIZE); \
81 __start_bss_decrypted_unused = .; \
82 . = ALIGN(PMD_SIZE); \
83 __end_bss_decrypted = .; \
84
68#else 85#else
69 86
70#define X86_ALIGN_RODATA_BEGIN 87#define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +91,7 @@ jiffies_64 = jiffies;
74 91
75#define ALIGN_ENTRY_TEXT_BEGIN 92#define ALIGN_ENTRY_TEXT_BEGIN
76#define ALIGN_ENTRY_TEXT_END 93#define ALIGN_ENTRY_TEXT_END
94#define BSS_DECRYPTED
77 95
78#endif 96#endif
79 97
@@ -355,6 +373,7 @@ SECTIONS
355 __bss_start = .; 373 __bss_start = .;
356 *(.bss..page_aligned) 374 *(.bss..page_aligned)
357 *(.bss) 375 *(.bss)
376 BSS_DECRYPTED
358 . = ALIGN(PAGE_SIZE); 377 . = ALIGN(PAGE_SIZE);
359 __bss_stop = .; 378 __bss_stop = .;
360 } 379 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26c1115..faca978ebf9d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
815 set_memory_np_noalias(begin_ul, len_pages); 815 set_memory_np_noalias(begin_ul, len_pages);
816} 816}
817 817
818void __weak mem_encrypt_free_decrypted_mem(void) { }
819
818void __ref free_initmem(void) 820void __ref free_initmem(void)
819{ 821{
820 e820__reallocate_tables(); 822 e820__reallocate_tables();
821 823
824 mem_encrypt_free_decrypted_mem();
825
822 free_kernel_image_pages(&__init_begin, &__init_end); 826 free_kernel_image_pages(&__init_begin, &__init_end);
823} 827}
824 828
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398d1fd3..006f373f54ab 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,30 @@ bool sev_active(void)
348EXPORT_SYMBOL(sev_active); 348EXPORT_SYMBOL(sev_active);
349 349
350/* Architecture __weak replacement functions */ 350/* Architecture __weak replacement functions */
351void __init mem_encrypt_free_decrypted_mem(void)
352{
353 unsigned long vaddr, vaddr_end, npages;
354 int r;
355
356 vaddr = (unsigned long)__start_bss_decrypted_unused;
357 vaddr_end = (unsigned long)__end_bss_decrypted;
358 npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
359
360 /*
361 * The unused memory range was mapped decrypted, change the encryption
362 * attribute from decrypted to encrypted before freeing it.
363 */
364 if (mem_encrypt_active()) {
365 r = set_memory_encrypted(vaddr, npages);
366 if (r) {
367 pr_warn("failed to free unused decrypted pages\n");
368 return;
369 }
370 }
371
372 free_init_pages("unused decrypted", vaddr, vaddr_end);
373}
374
351void __init mem_encrypt_init(void) 375void __init mem_encrypt_init(void)
352{ 376{
353 if (!sme_me_mask) 377 if (!sme_me_mask)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ae394552fb94..089e78c4effd 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -637,6 +637,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
637{ 637{
638 unsigned long address = __fix_to_virt(idx); 638 unsigned long address = __fix_to_virt(idx);
639 639
640#ifdef CONFIG_X86_64
641 /*
642 * Ensure that the static initial page tables are covering the
643 * fixmap completely.
644 */
645 BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
646 (FIXMAP_PMD_NUM * PTRS_PER_PTE));
647#endif
648
640 if (idx >= __end_of_fixed_addresses) { 649 if (idx >= __end_of_fixed_addresses) {
641 BUG(); 650 BUG();
642 return; 651 return;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2fe5c9b1816b..dd461c0167ef 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1907 /* L3_k[511] -> level2_fixmap_pgt */ 1907 /* L3_k[511] -> level2_fixmap_pgt */
1908 convert_pfn_mfn(level3_kernel_pgt); 1908 convert_pfn_mfn(level3_kernel_pgt);
1909 1909
1910 /* L3_k[511][506] -> level1_fixmap_pgt */ 1910 /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
1911 convert_pfn_mfn(level2_fixmap_pgt); 1911 convert_pfn_mfn(level2_fixmap_pgt);
1912 1912
1913 /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 1913 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
@@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1952 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 1952 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1953 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1953 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1954 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1954 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1955 set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); 1955
1956 for (i = 0; i < FIXMAP_PMD_NUM; i++) {
1957 set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
1958 PAGE_KERNEL_RO);
1959 }
1956 1960
1957 /* Pin down new L4 */ 1961 /* Pin down new L4 */
1958 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, 1962 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 7d00d4ad44d4..95997e6c0696 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -478,7 +478,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
478irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) 478irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
479{ 479{
480 int err, ret = IRQ_NONE; 480 int err, ret = IRQ_NONE;
481 struct pt_regs regs; 481 struct pt_regs regs = {0};
482 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 482 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
483 uint8_t xenpmu_flags = get_xenpmu_flags(); 483 uint8_t xenpmu_flags = get_xenpmu_flags();
484 484
diff --git a/block/bio.c b/block/bio.c
index 8c680a776171..0093bed81c0e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1684,7 +1684,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
1684 const int sgrp = op_stat_group(req_op); 1684 const int sgrp = op_stat_group(req_op);
1685 int cpu = part_stat_lock(); 1685 int cpu = part_stat_lock();
1686 1686
1687 part_stat_add(cpu, part, ticks[sgrp], duration); 1687 part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
1688 part_round_stats(q, cpu, part); 1688 part_round_stats(q, cpu, part);
1689 part_dec_in_flight(q, part, op_is_write(req_op)); 1689 part_dec_in_flight(q, part, op_is_write(req_op));
1690 1690
diff --git a/block/blk-core.c b/block/blk-core.c
index 4dbc93f43b38..cff0a60ee200 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2733,17 +2733,15 @@ void blk_account_io_done(struct request *req, u64 now)
2733 * containing request is enough. 2733 * containing request is enough.
2734 */ 2734 */
2735 if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { 2735 if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
2736 unsigned long duration;
2737 const int sgrp = op_stat_group(req_op(req)); 2736 const int sgrp = op_stat_group(req_op(req));
2738 struct hd_struct *part; 2737 struct hd_struct *part;
2739 int cpu; 2738 int cpu;
2740 2739
2741 duration = nsecs_to_jiffies(now - req->start_time_ns);
2742 cpu = part_stat_lock(); 2740 cpu = part_stat_lock();
2743 part = req->part; 2741 part = req->part;
2744 2742
2745 part_stat_inc(cpu, part, ios[sgrp]); 2743 part_stat_inc(cpu, part, ios[sgrp]);
2746 part_stat_add(cpu, part, ticks[sgrp], duration); 2744 part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
2747 part_round_stats(req->q, cpu, part); 2745 part_round_stats(req->q, cpu, part);
2748 part_dec_in_flight(req->q, part, rq_data_dir(req)); 2746 part_dec_in_flight(req->q, part, rq_data_dir(req));
2749 2747
diff --git a/block/genhd.c b/block/genhd.c
index 8cc719a37b32..be5bab20b2ab 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1343,18 +1343,18 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1343 part_stat_read(hd, ios[STAT_READ]), 1343 part_stat_read(hd, ios[STAT_READ]),
1344 part_stat_read(hd, merges[STAT_READ]), 1344 part_stat_read(hd, merges[STAT_READ]),
1345 part_stat_read(hd, sectors[STAT_READ]), 1345 part_stat_read(hd, sectors[STAT_READ]),
1346 jiffies_to_msecs(part_stat_read(hd, ticks[STAT_READ])), 1346 (unsigned int)part_stat_read_msecs(hd, STAT_READ),
1347 part_stat_read(hd, ios[STAT_WRITE]), 1347 part_stat_read(hd, ios[STAT_WRITE]),
1348 part_stat_read(hd, merges[STAT_WRITE]), 1348 part_stat_read(hd, merges[STAT_WRITE]),
1349 part_stat_read(hd, sectors[STAT_WRITE]), 1349 part_stat_read(hd, sectors[STAT_WRITE]),
1350 jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])), 1350 (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
1351 inflight[0], 1351 inflight[0],
1352 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1352 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1353 jiffies_to_msecs(part_stat_read(hd, time_in_queue)), 1353 jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
1354 part_stat_read(hd, ios[STAT_DISCARD]), 1354 part_stat_read(hd, ios[STAT_DISCARD]),
1355 part_stat_read(hd, merges[STAT_DISCARD]), 1355 part_stat_read(hd, merges[STAT_DISCARD]),
1356 part_stat_read(hd, sectors[STAT_DISCARD]), 1356 part_stat_read(hd, sectors[STAT_DISCARD]),
1357 jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD])) 1357 (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD)
1358 ); 1358 );
1359 } 1359 }
1360 disk_part_iter_exit(&piter); 1360 disk_part_iter_exit(&piter);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 5a8975a1201c..d3d14e81fb12 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -136,18 +136,18 @@ ssize_t part_stat_show(struct device *dev,
136 part_stat_read(p, ios[STAT_READ]), 136 part_stat_read(p, ios[STAT_READ]),
137 part_stat_read(p, merges[STAT_READ]), 137 part_stat_read(p, merges[STAT_READ]),
138 (unsigned long long)part_stat_read(p, sectors[STAT_READ]), 138 (unsigned long long)part_stat_read(p, sectors[STAT_READ]),
139 jiffies_to_msecs(part_stat_read(p, ticks[STAT_READ])), 139 (unsigned int)part_stat_read_msecs(p, STAT_READ),
140 part_stat_read(p, ios[STAT_WRITE]), 140 part_stat_read(p, ios[STAT_WRITE]),
141 part_stat_read(p, merges[STAT_WRITE]), 141 part_stat_read(p, merges[STAT_WRITE]),
142 (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]), 142 (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
143 jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])), 143 (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
144 inflight[0], 144 inflight[0],
145 jiffies_to_msecs(part_stat_read(p, io_ticks)), 145 jiffies_to_msecs(part_stat_read(p, io_ticks)),
146 jiffies_to_msecs(part_stat_read(p, time_in_queue)), 146 jiffies_to_msecs(part_stat_read(p, time_in_queue)),
147 part_stat_read(p, ios[STAT_DISCARD]), 147 part_stat_read(p, ios[STAT_DISCARD]),
148 part_stat_read(p, merges[STAT_DISCARD]), 148 part_stat_read(p, merges[STAT_DISCARD]),
149 (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]), 149 (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
150 jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD]))); 150 (unsigned int)part_stat_read_msecs(p, STAT_DISCARD));
151} 151}
152 152
153ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, 153ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index d8e159feb573..89110dfc7127 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -90,14 +90,17 @@ config EFI_ARMSTUB
90config EFI_ARMSTUB_DTB_LOADER 90config EFI_ARMSTUB_DTB_LOADER
91 bool "Enable the DTB loader" 91 bool "Enable the DTB loader"
92 depends on EFI_ARMSTUB 92 depends on EFI_ARMSTUB
93 default y
93 help 94 help
94 Select this config option to add support for the dtb= command 95 Select this config option to add support for the dtb= command
95 line parameter, allowing a device tree blob to be loaded into 96 line parameter, allowing a device tree blob to be loaded into
96 memory from the EFI System Partition by the stub. 97 memory from the EFI System Partition by the stub.
97 98
98 The device tree is typically provided by the platform or by 99 If the device tree is provided by the platform or by
99 the bootloader, so this option is mostly for development 100 the bootloader this option may not be needed.
100 purposes only. 101 But, for various development reasons and to maintain existing
102 functionality for bootloaders that do not have such support
103 this option is necessary.
101 104
102config EFI_BOOTLOADER_CONTROL 105config EFI_BOOTLOADER_CONTROL
103 tristate "EFI Bootloader Control" 106 tristate "EFI Bootloader Control"
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index e11ab12fbdf2..800986a79704 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -528,8 +528,8 @@ static int usbhs_omap_get_dt_pdata(struct device *dev,
528} 528}
529 529
530static const struct of_device_id usbhs_child_match_table[] = { 530static const struct of_device_id usbhs_child_match_table[] = {
531 { .compatible = "ti,omap-ehci", }, 531 { .compatible = "ti,ehci-omap", },
532 { .compatible = "ti,omap-ohci", }, 532 { .compatible = "ti,ohci-omap3", },
533 { } 533 { }
534}; 534};
535 535
@@ -855,6 +855,7 @@ static struct platform_driver usbhs_omap_driver = {
855 .pm = &usbhsomap_dev_pm_ops, 855 .pm = &usbhsomap_dev_pm_ops,
856 .of_match_table = usbhs_omap_dt_ids, 856 .of_match_table = usbhs_omap_dt_ids,
857 }, 857 },
858 .probe = usbhs_omap_probe,
858 .remove = usbhs_omap_remove, 859 .remove = usbhs_omap_remove,
859}; 860};
860 861
@@ -864,9 +865,9 @@ MODULE_ALIAS("platform:" USBHS_DRIVER_NAME);
864MODULE_LICENSE("GPL v2"); 865MODULE_LICENSE("GPL v2");
865MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI"); 866MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI");
866 867
867static int __init omap_usbhs_drvinit(void) 868static int omap_usbhs_drvinit(void)
868{ 869{
869 return platform_driver_probe(&usbhs_omap_driver, usbhs_omap_probe); 870 return platform_driver_register(&usbhs_omap_driver);
870} 871}
871 872
872/* 873/*
@@ -878,7 +879,7 @@ static int __init omap_usbhs_drvinit(void)
878 */ 879 */
879fs_initcall_sync(omap_usbhs_drvinit); 880fs_initcall_sync(omap_usbhs_drvinit);
880 881
881static void __exit omap_usbhs_drvexit(void) 882static void omap_usbhs_drvexit(void)
882{ 883{
883 platform_driver_unregister(&usbhs_omap_driver); 884 platform_driver_unregister(&usbhs_omap_driver);
884} 885}
diff --git a/drivers/pinctrl/intel/pinctrl-cannonlake.c b/drivers/pinctrl/intel/pinctrl-cannonlake.c
index fb1afe55bf53..8d48371caaa2 100644
--- a/drivers/pinctrl/intel/pinctrl-cannonlake.c
+++ b/drivers/pinctrl/intel/pinctrl-cannonlake.c
@@ -379,7 +379,7 @@ static const struct intel_padgroup cnlh_community1_gpps[] = {
379static const struct intel_padgroup cnlh_community3_gpps[] = { 379static const struct intel_padgroup cnlh_community3_gpps[] = {
380 CNL_GPP(0, 155, 178, 192), /* GPP_K */ 380 CNL_GPP(0, 155, 178, 192), /* GPP_K */
381 CNL_GPP(1, 179, 202, 224), /* GPP_H */ 381 CNL_GPP(1, 179, 202, 224), /* GPP_H */
382 CNL_GPP(2, 203, 215, 258), /* GPP_E */ 382 CNL_GPP(2, 203, 215, 256), /* GPP_E */
383 CNL_GPP(3, 216, 239, 288), /* GPP_F */ 383 CNL_GPP(3, 216, 239, 288), /* GPP_F */
384 CNL_GPP(4, 240, 248, CNL_NO_GPIO), /* SPI */ 384 CNL_GPP(4, 240, 248, CNL_NO_GPIO), /* SPI */
385}; 385};
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 62b009b27eda..ec8dafc94694 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -747,13 +747,63 @@ static const struct pinctrl_desc intel_pinctrl_desc = {
747 .owner = THIS_MODULE, 747 .owner = THIS_MODULE,
748}; 748};
749 749
750/**
751 * intel_gpio_to_pin() - Translate from GPIO offset to pin number
752 * @pctrl: Pinctrl structure
753 * @offset: GPIO offset from gpiolib
754 * @commmunity: Community is filled here if not %NULL
755 * @padgrp: Pad group is filled here if not %NULL
756 *
757 * When coming through gpiolib irqchip, the GPIO offset is not
758 * automatically translated to pinctrl pin number. This function can be
759 * used to find out the corresponding pinctrl pin.
760 */
761static int intel_gpio_to_pin(struct intel_pinctrl *pctrl, unsigned offset,
762 const struct intel_community **community,
763 const struct intel_padgroup **padgrp)
764{
765 int i;
766
767 for (i = 0; i < pctrl->ncommunities; i++) {
768 const struct intel_community *comm = &pctrl->communities[i];
769 int j;
770
771 for (j = 0; j < comm->ngpps; j++) {
772 const struct intel_padgroup *pgrp = &comm->gpps[j];
773
774 if (pgrp->gpio_base < 0)
775 continue;
776
777 if (offset >= pgrp->gpio_base &&
778 offset < pgrp->gpio_base + pgrp->size) {
779 int pin;
780
781 pin = pgrp->base + offset - pgrp->gpio_base;
782 if (community)
783 *community = comm;
784 if (padgrp)
785 *padgrp = pgrp;
786
787 return pin;
788 }
789 }
790 }
791
792 return -EINVAL;
793}
794
750static int intel_gpio_get(struct gpio_chip *chip, unsigned offset) 795static int intel_gpio_get(struct gpio_chip *chip, unsigned offset)
751{ 796{
752 struct intel_pinctrl *pctrl = gpiochip_get_data(chip); 797 struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
753 void __iomem *reg; 798 void __iomem *reg;
754 u32 padcfg0; 799 u32 padcfg0;
800 int pin;
801
802 pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
803 if (pin < 0)
804 return -EINVAL;
755 805
756 reg = intel_get_padcfg(pctrl, offset, PADCFG0); 806 reg = intel_get_padcfg(pctrl, pin, PADCFG0);
757 if (!reg) 807 if (!reg)
758 return -EINVAL; 808 return -EINVAL;
759 809
@@ -770,8 +820,13 @@ static void intel_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
770 unsigned long flags; 820 unsigned long flags;
771 void __iomem *reg; 821 void __iomem *reg;
772 u32 padcfg0; 822 u32 padcfg0;
823 int pin;
824
825 pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
826 if (pin < 0)
827 return;
773 828
774 reg = intel_get_padcfg(pctrl, offset, PADCFG0); 829 reg = intel_get_padcfg(pctrl, pin, PADCFG0);
775 if (!reg) 830 if (!reg)
776 return; 831 return;
777 832
@@ -790,8 +845,13 @@ static int intel_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
790 struct intel_pinctrl *pctrl = gpiochip_get_data(chip); 845 struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
791 void __iomem *reg; 846 void __iomem *reg;
792 u32 padcfg0; 847 u32 padcfg0;
848 int pin;
793 849
794 reg = intel_get_padcfg(pctrl, offset, PADCFG0); 850 pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
851 if (pin < 0)
852 return -EINVAL;
853
854 reg = intel_get_padcfg(pctrl, pin, PADCFG0);
795 if (!reg) 855 if (!reg)
796 return -EINVAL; 856 return -EINVAL;
797 857
@@ -827,51 +887,6 @@ static const struct gpio_chip intel_gpio_chip = {
827 .set_config = gpiochip_generic_config, 887 .set_config = gpiochip_generic_config,
828}; 888};
829 889
830/**
831 * intel_gpio_to_pin() - Translate from GPIO offset to pin number
832 * @pctrl: Pinctrl structure
833 * @offset: GPIO offset from gpiolib
834 * @commmunity: Community is filled here if not %NULL
835 * @padgrp: Pad group is filled here if not %NULL
836 *
837 * When coming through gpiolib irqchip, the GPIO offset is not
838 * automatically translated to pinctrl pin number. This function can be
839 * used to find out the corresponding pinctrl pin.
840 */
841static int intel_gpio_to_pin(struct intel_pinctrl *pctrl, unsigned offset,
842 const struct intel_community **community,
843 const struct intel_padgroup **padgrp)
844{
845 int i;
846
847 for (i = 0; i < pctrl->ncommunities; i++) {
848 const struct intel_community *comm = &pctrl->communities[i];
849 int j;
850
851 for (j = 0; j < comm->ngpps; j++) {
852 const struct intel_padgroup *pgrp = &comm->gpps[j];
853
854 if (pgrp->gpio_base < 0)
855 continue;
856
857 if (offset >= pgrp->gpio_base &&
858 offset < pgrp->gpio_base + pgrp->size) {
859 int pin;
860
861 pin = pgrp->base + offset - pgrp->gpio_base;
862 if (community)
863 *community = comm;
864 if (padgrp)
865 *padgrp = pgrp;
866
867 return pin;
868 }
869 }
870 }
871
872 return -EINVAL;
873}
874
875static int intel_gpio_irq_reqres(struct irq_data *d) 890static int intel_gpio_irq_reqres(struct irq_data *d)
876{ 891{
877 struct gpio_chip *gc = irq_data_get_irq_chip_data(d); 892 struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7bafa703a992..84575baceebc 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1040,18 +1040,33 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
1040 return ret; 1040 return ret;
1041 1041
1042 for (i = 0; i < count; i++) { 1042 for (i = 0; i < count; i++) {
1043 /* Retry eagain maps */ 1043 switch (map_ops[i].status) {
1044 if (map_ops[i].status == GNTST_eagain) 1044 case GNTST_okay:
1045 gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, 1045 {
1046 &map_ops[i].status, __func__);
1047
1048 if (map_ops[i].status == GNTST_okay) {
1049 struct xen_page_foreign *foreign; 1046 struct xen_page_foreign *foreign;
1050 1047
1051 SetPageForeign(pages[i]); 1048 SetPageForeign(pages[i]);
1052 foreign = xen_page_foreign(pages[i]); 1049 foreign = xen_page_foreign(pages[i]);
1053 foreign->domid = map_ops[i].dom; 1050 foreign->domid = map_ops[i].dom;
1054 foreign->gref = map_ops[i].ref; 1051 foreign->gref = map_ops[i].ref;
1052 break;
1053 }
1054
1055 case GNTST_no_device_space:
1056 pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n");
1057 break;
1058
1059 case GNTST_eagain:
1060 /* Retry eagain maps */
1061 gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref,
1062 map_ops + i,
1063 &map_ops[i].status, __func__);
1064 /* Test status in next loop iteration. */
1065 i--;
1066 break;
1067
1068 default:
1069 break;
1055 } 1070 }
1056 } 1071 }
1057 1072
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 57864422a2c8..25c08c6c7f99 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -83,10 +83,10 @@ struct partition {
83} __attribute__((packed)); 83} __attribute__((packed));
84 84
85struct disk_stats { 85struct disk_stats {
86 u64 nsecs[NR_STAT_GROUPS];
86 unsigned long sectors[NR_STAT_GROUPS]; 87 unsigned long sectors[NR_STAT_GROUPS];
87 unsigned long ios[NR_STAT_GROUPS]; 88 unsigned long ios[NR_STAT_GROUPS];
88 unsigned long merges[NR_STAT_GROUPS]; 89 unsigned long merges[NR_STAT_GROUPS];
89 unsigned long ticks[NR_STAT_GROUPS];
90 unsigned long io_ticks; 90 unsigned long io_ticks;
91 unsigned long time_in_queue; 91 unsigned long time_in_queue;
92}; 92};
@@ -354,6 +354,9 @@ static inline void free_part_stats(struct hd_struct *part)
354 354
355#endif /* CONFIG_SMP */ 355#endif /* CONFIG_SMP */
356 356
357#define part_stat_read_msecs(part, which) \
358 div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
359
357#define part_stat_read_accum(part, field) \ 360#define part_stat_read_accum(part, field) \
358 (part_stat_read(part, field[STAT_READ]) + \ 361 (part_stat_read(part, field[STAT_READ]) + \
359 part_stat_read(part, field[STAT_WRITE]) + \ 362 part_stat_read(part, field[STAT_WRITE]) + \
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 8bdbb5f29494..74b0aa9c7499 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -357,6 +357,8 @@
357#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) 357#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
358#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) 358#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
359 359
360#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12))
361
360#define GITS_BASER_NR_REGS 8 362#define GITS_BASER_NR_REGS 8
361 363
362#define GITS_BASER_VALID (1ULL << 63) 364#define GITS_BASER_VALID (1ULL << 63)
@@ -388,6 +390,9 @@
388#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) 390#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48)
389#define GITS_BASER_PHYS_52_to_48(phys) \ 391#define GITS_BASER_PHYS_52_to_48(phys) \
390 (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) 392 (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
393#define GITS_BASER_ADDR_48_to_52(baser) \
394 (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
395
391#define GITS_BASER_SHAREABILITY_SHIFT (10) 396#define GITS_BASER_SHAREABILITY_SHIFT (10)
392#define GITS_BASER_InnerShareable \ 397#define GITS_BASER_InnerShareable \
393 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) 398 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
index 8a125701ef7b..50bed4f89c1a 100644
--- a/include/linux/mfd/da9063/pdata.h
+++ b/include/linux/mfd/da9063/pdata.h
@@ -21,7 +21,7 @@
21/* 21/*
22 * Regulator configuration 22 * Regulator configuration
23 */ 23 */
24/* DA9063 regulator IDs */ 24/* DA9063 and DA9063L regulator IDs */
25enum { 25enum {
26 /* BUCKs */ 26 /* BUCKs */
27 DA9063_ID_BCORE1, 27 DA9063_ID_BCORE1,
@@ -37,18 +37,20 @@ enum {
37 DA9063_ID_BMEM_BIO_MERGED, 37 DA9063_ID_BMEM_BIO_MERGED,
38 /* When two BUCKs are merged, they cannot be reused separately */ 38 /* When two BUCKs are merged, they cannot be reused separately */
39 39
40 /* LDOs */ 40 /* LDOs on both DA9063 and DA9063L */
41 DA9063_ID_LDO3,
42 DA9063_ID_LDO7,
43 DA9063_ID_LDO8,
44 DA9063_ID_LDO9,
45 DA9063_ID_LDO11,
46
47 /* DA9063-only LDOs */
41 DA9063_ID_LDO1, 48 DA9063_ID_LDO1,
42 DA9063_ID_LDO2, 49 DA9063_ID_LDO2,
43 DA9063_ID_LDO3,
44 DA9063_ID_LDO4, 50 DA9063_ID_LDO4,
45 DA9063_ID_LDO5, 51 DA9063_ID_LDO5,
46 DA9063_ID_LDO6, 52 DA9063_ID_LDO6,
47 DA9063_ID_LDO7,
48 DA9063_ID_LDO8,
49 DA9063_ID_LDO9,
50 DA9063_ID_LDO10, 53 DA9063_ID_LDO10,
51 DA9063_ID_LDO11,
52}; 54};
53 55
54/* Regulators platform data */ 56/* Regulators platform data */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cb6d44e1fe02..2b7a652c9fa4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -758,6 +758,15 @@ struct kvm_ppc_resize_hpt {
758#define KVM_S390_SIE_PAGE_OFFSET 1 758#define KVM_S390_SIE_PAGE_OFFSET 1
759 759
760/* 760/*
761 * On arm64, machine type can be used to request the physical
762 * address size for the VM. Bits[7-0] are reserved for the guest
763 * PA size shift (i.e, log2(PA_Size)). For backward compatibility,
764 * value 0 implies the default IPA size, 40bits.
765 */
766#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL
767#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \
768 ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
769/*
761 * ioctls for /dev/kvm fds: 770 * ioctls for /dev/kvm fds:
762 */ 771 */
763#define KVM_GET_API_VERSION _IO(KVMIO, 0x00) 772#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
@@ -965,6 +974,7 @@ struct kvm_ppc_resize_hpt {
965#define KVM_CAP_COALESCED_PIO 162 974#define KVM_CAP_COALESCED_PIO 162
966#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 975#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
967#define KVM_CAP_EXCEPTION_PAYLOAD 164 976#define KVM_CAP_EXCEPTION_PAYLOAD 164
977#define KVM_CAP_ARM_VM_IPA_SIZE 165
968 978
969#ifdef KVM_CAP_IRQ_ROUTING 979#ifdef KVM_CAP_IRQ_ROUTING
970 980
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 13a861135127..6eb9bacd1948 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2abd0f112627..bdb94939fd60 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -50,6 +50,7 @@
50#include "libbpf.h" 50#include "libbpf.h"
51#include "bpf.h" 51#include "bpf.h"
52#include "btf.h" 52#include "btf.h"
53#include "str_error.h"
53 54
54#ifndef EM_BPF 55#ifndef EM_BPF
55#define EM_BPF 247 56#define EM_BPF 247
@@ -469,7 +470,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
469 obj->efile.fd = open(obj->path, O_RDONLY); 470 obj->efile.fd = open(obj->path, O_RDONLY);
470 if (obj->efile.fd < 0) { 471 if (obj->efile.fd < 0) {
471 char errmsg[STRERR_BUFSIZE]; 472 char errmsg[STRERR_BUFSIZE];
472 char *cp = strerror_r(errno, errmsg, sizeof(errmsg)); 473 char *cp = str_error(errno, errmsg, sizeof(errmsg));
473 474
474 pr_warning("failed to open %s: %s\n", obj->path, cp); 475 pr_warning("failed to open %s: %s\n", obj->path, cp);
475 return -errno; 476 return -errno;
@@ -810,8 +811,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
810 data->d_size, name, idx); 811 data->d_size, name, idx);
811 if (err) { 812 if (err) {
812 char errmsg[STRERR_BUFSIZE]; 813 char errmsg[STRERR_BUFSIZE];
813 char *cp = strerror_r(-err, errmsg, 814 char *cp = str_error(-err, errmsg, sizeof(errmsg));
814 sizeof(errmsg));
815 815
816 pr_warning("failed to alloc program %s (%s): %s", 816 pr_warning("failed to alloc program %s (%s): %s",
817 name, obj->path, cp); 817 name, obj->path, cp);
@@ -1140,7 +1140,7 @@ bpf_object__create_maps(struct bpf_object *obj)
1140 1140
1141 *pfd = bpf_create_map_xattr(&create_attr); 1141 *pfd = bpf_create_map_xattr(&create_attr);
1142 if (*pfd < 0 && create_attr.btf_key_type_id) { 1142 if (*pfd < 0 && create_attr.btf_key_type_id) {
1143 cp = strerror_r(errno, errmsg, sizeof(errmsg)); 1143 cp = str_error(errno, errmsg, sizeof(errmsg));
1144 pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", 1144 pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
1145 map->name, cp, errno); 1145 map->name, cp, errno);
1146 create_attr.btf_fd = 0; 1146 create_attr.btf_fd = 0;
@@ -1155,7 +1155,7 @@ bpf_object__create_maps(struct bpf_object *obj)
1155 size_t j; 1155 size_t j;
1156 1156
1157 err = *pfd; 1157 err = *pfd;
1158 cp = strerror_r(errno, errmsg, sizeof(errmsg)); 1158 cp = str_error(errno, errmsg, sizeof(errmsg));
1159 pr_warning("failed to create map (name: '%s'): %s\n", 1159 pr_warning("failed to create map (name: '%s'): %s\n",
1160 map->name, cp); 1160 map->name, cp);
1161 for (j = 0; j < i; j++) 1161 for (j = 0; j < i; j++)
@@ -1339,7 +1339,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
1339 } 1339 }
1340 1340
1341 ret = -LIBBPF_ERRNO__LOAD; 1341 ret = -LIBBPF_ERRNO__LOAD;
1342 cp = strerror_r(errno, errmsg, sizeof(errmsg)); 1342 cp = str_error(errno, errmsg, sizeof(errmsg));
1343 pr_warning("load bpf program failed: %s\n", cp); 1343 pr_warning("load bpf program failed: %s\n", cp);
1344 1344
1345 if (log_buf && log_buf[0] != '\0') { 1345 if (log_buf && log_buf[0] != '\0') {
@@ -1654,7 +1654,7 @@ static int check_path(const char *path)
1654 1654
1655 dir = dirname(dname); 1655 dir = dirname(dname);
1656 if (statfs(dir, &st_fs)) { 1656 if (statfs(dir, &st_fs)) {
1657 cp = strerror_r(errno, errmsg, sizeof(errmsg)); 1657 cp = str_error(errno, errmsg, sizeof(errmsg));
1658 pr_warning("failed to statfs %s: %s\n", dir, cp); 1658 pr_warning("failed to statfs %s: %s\n", dir, cp);
1659 err = -errno; 1659 err = -errno;
1660 } 1660 }
@@ -1690,7 +1690,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
1690 } 1690 }
1691 1691
1692 if (bpf_obj_pin(prog->instances.fds[instance], path)) { 1692 if (bpf_obj_pin(prog->instances.fds[instance], path)) {
1693 cp = strerror_r(errno, errmsg, sizeof(errmsg)); 1693 cp = str_error(errno, errmsg, sizeof(errmsg));
1694 pr_warning("failed to pin program: %s\n", cp); 1694 pr_warning("failed to pin program: %s\n", cp);
1695 return -errno; 1695 return -errno;
1696 } 1696 }
@@ -1708,7 +1708,7 @@ static int make_dir(const char *path)
1708 err = -errno; 1708 err = -errno;
1709 1709
1710 if (err) { 1710 if (err) {
1711 cp = strerror_r(-err, errmsg, sizeof(errmsg)); 1711 cp = str_error(-err, errmsg, sizeof(errmsg));
1712 pr_warning("failed to mkdir %s: %s\n", path, cp); 1712 pr_warning("failed to mkdir %s: %s\n", path, cp);
1713 } 1713 }
1714 return err; 1714 return err;
@@ -1770,7 +1770,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
1770 } 1770 }
1771 1771
1772 if (bpf_obj_pin(map->fd, path)) { 1772 if (bpf_obj_pin(map->fd, path)) {
1773 cp = strerror_r(errno, errmsg, sizeof(errmsg)); 1773 cp = str_error(errno, errmsg, sizeof(errmsg));
1774 pr_warning("failed to pin map: %s\n", cp); 1774 pr_warning("failed to pin map: %s\n", cp);
1775 return -errno; 1775 return -errno;
1776 } 1776 }
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
new file mode 100644
index 000000000000..b8798114a357
--- /dev/null
+++ b/tools/lib/bpf/str_error.c
@@ -0,0 +1,18 @@
1// SPDX-License-Identifier: LGPL-2.1
2#undef _GNU_SOURCE
3#include <string.h>
4#include <stdio.h>
5#include "str_error.h"
6
7/*
8 * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
9 * libc, while checking strerror_r() return to avoid having to check this in
10 * all places calling it.
11 */
12char *str_error(int err, char *dst, int len)
13{
14 int ret = strerror_r(err, dst, len);
15 if (ret)
16 snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
17 return dst;
18}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
new file mode 100644
index 000000000000..355b1db571d1
--- /dev/null
+++ b/tools/lib/bpf/str_error.h
@@ -0,0 +1,6 @@
1// SPDX-License-Identifier: LGPL-2.1
2#ifndef BPF_STR_ERROR
3#define BPF_STR_ERROR
4
5char *str_error(int err, char *dst, int len);
6#endif // BPF_STR_ERROR
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 42261a9b280e..ac841bc5c35b 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -280,7 +280,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
280 mv $@+ $@ 280 mv $@+ $@
281 281
282ifdef USE_ASCIIDOCTOR 282ifdef USE_ASCIIDOCTOR
283$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt 283$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
284 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 284 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
285 $(ASCIIDOC) -b manpage -d manpage \ 285 $(ASCIIDOC) -b manpage -d manpage \
286 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ 286 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index c92053bc3f96..11b98b2b0486 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -120,8 +120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
120{ 120{
121 int ret, cpu; 121 int ret, cpu;
122 122
123 if (type) 123 ret = kvm_arm_setup_stage2(kvm, type);
124 return -EINVAL; 124 if (ret)
125 return ret;
125 126
126 kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); 127 kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
127 if (!kvm->arch.last_vcpu_ran) 128 if (!kvm->arch.last_vcpu_ran)
@@ -212,6 +213,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
212 case KVM_CAP_READONLY_MEM: 213 case KVM_CAP_READONLY_MEM:
213 case KVM_CAP_MP_STATE: 214 case KVM_CAP_MP_STATE:
214 case KVM_CAP_IMMEDIATE_EXIT: 215 case KVM_CAP_IMMEDIATE_EXIT:
216 case KVM_CAP_VCPU_EVENTS:
215 r = 1; 217 r = 1;
216 break; 218 break;
217 case KVM_CAP_ARM_SET_DEVICE_ADDR: 219 case KVM_CAP_ARM_SET_DEVICE_ADDR:
@@ -240,7 +242,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
240 r = 1; 242 r = 1;
241 break; 243 break;
242 default: 244 default:
243 r = kvm_arch_dev_ioctl_check_extension(kvm, ext); 245 r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
244 break; 246 break;
245 } 247 }
246 return r; 248 return r;
@@ -544,7 +546,7 @@ static void update_vttbr(struct kvm *kvm)
544 546
545 /* update vttbr to be used with the new vmid */ 547 /* update vttbr to be used with the new vmid */
546 pgd_phys = virt_to_phys(kvm->arch.pgd); 548 pgd_phys = virt_to_phys(kvm->arch.pgd);
547 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 549 BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm));
548 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); 550 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
549 kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; 551 kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
550 552
@@ -1295,8 +1297,6 @@ static void cpu_init_hyp_mode(void *dummy)
1295 1297
1296 __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); 1298 __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
1297 __cpu_init_stage2(); 1299 __cpu_init_stage2();
1298
1299 kvm_arm_init_debug();
1300} 1300}
1301 1301
1302static void cpu_hyp_reset(void) 1302static void cpu_hyp_reset(void)
@@ -1309,16 +1309,12 @@ static void cpu_hyp_reinit(void)
1309{ 1309{
1310 cpu_hyp_reset(); 1310 cpu_hyp_reset();
1311 1311
1312 if (is_kernel_in_hyp_mode()) { 1312 if (is_kernel_in_hyp_mode())
1313 /*
1314 * __cpu_init_stage2() is safe to call even if the PM
1315 * event was cancelled before the CPU was reset.
1316 */
1317 __cpu_init_stage2();
1318 kvm_timer_init_vhe(); 1313 kvm_timer_init_vhe();
1319 } else { 1314 else
1320 cpu_init_hyp_mode(NULL); 1315 cpu_init_hyp_mode(NULL);
1321 } 1316
1317 kvm_arm_init_debug();
1322 1318
1323 if (vgic_present) 1319 if (vgic_present)
1324 kvm_vgic_init_cpu_hardware(); 1320 kvm_vgic_init_cpu_hardware();
@@ -1412,6 +1408,8 @@ static int init_common_resources(void)
1412 kvm_vmid_bits = kvm_get_vmid_bits(); 1408 kvm_vmid_bits = kvm_get_vmid_bits();
1413 kvm_info("%d-bit VMID\n", kvm_vmid_bits); 1409 kvm_info("%d-bit VMID\n", kvm_vmid_bits);
1414 1410
1411 kvm_set_ipa_limit();
1412
1415 return 0; 1413 return 0;
1416} 1414}
1417 1415
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..c23a1b323aad 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -45,7 +45,6 @@ static phys_addr_t hyp_idmap_vector;
45 45
46static unsigned long io_map_base; 46static unsigned long io_map_base;
47 47
48#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
49#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 48#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
50 49
51#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) 50#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
@@ -150,20 +149,20 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
150 149
151static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 150static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
152{ 151{
153 pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); 152 pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL);
154 stage2_pgd_clear(pgd); 153 stage2_pgd_clear(kvm, pgd);
155 kvm_tlb_flush_vmid_ipa(kvm, addr); 154 kvm_tlb_flush_vmid_ipa(kvm, addr);
156 stage2_pud_free(pud_table); 155 stage2_pud_free(kvm, pud_table);
157 put_page(virt_to_page(pgd)); 156 put_page(virt_to_page(pgd));
158} 157}
159 158
160static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 159static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
161{ 160{
162 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); 161 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
163 VM_BUG_ON(stage2_pud_huge(*pud)); 162 VM_BUG_ON(stage2_pud_huge(kvm, *pud));
164 stage2_pud_clear(pud); 163 stage2_pud_clear(kvm, pud);
165 kvm_tlb_flush_vmid_ipa(kvm, addr); 164 kvm_tlb_flush_vmid_ipa(kvm, addr);
166 stage2_pmd_free(pmd_table); 165 stage2_pmd_free(kvm, pmd_table);
167 put_page(virt_to_page(pud)); 166 put_page(virt_to_page(pud));
168} 167}
169 168
@@ -252,7 +251,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
252 } 251 }
253 } while (pte++, addr += PAGE_SIZE, addr != end); 252 } while (pte++, addr += PAGE_SIZE, addr != end);
254 253
255 if (stage2_pte_table_empty(start_pte)) 254 if (stage2_pte_table_empty(kvm, start_pte))
256 clear_stage2_pmd_entry(kvm, pmd, start_addr); 255 clear_stage2_pmd_entry(kvm, pmd, start_addr);
257} 256}
258 257
@@ -262,9 +261,9 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
262 phys_addr_t next, start_addr = addr; 261 phys_addr_t next, start_addr = addr;
263 pmd_t *pmd, *start_pmd; 262 pmd_t *pmd, *start_pmd;
264 263
265 start_pmd = pmd = stage2_pmd_offset(pud, addr); 264 start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr);
266 do { 265 do {
267 next = stage2_pmd_addr_end(addr, end); 266 next = stage2_pmd_addr_end(kvm, addr, end);
268 if (!pmd_none(*pmd)) { 267 if (!pmd_none(*pmd)) {
269 if (pmd_thp_or_huge(*pmd)) { 268 if (pmd_thp_or_huge(*pmd)) {
270 pmd_t old_pmd = *pmd; 269 pmd_t old_pmd = *pmd;
@@ -281,7 +280,7 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
281 } 280 }
282 } while (pmd++, addr = next, addr != end); 281 } while (pmd++, addr = next, addr != end);
283 282
284 if (stage2_pmd_table_empty(start_pmd)) 283 if (stage2_pmd_table_empty(kvm, start_pmd))
285 clear_stage2_pud_entry(kvm, pud, start_addr); 284 clear_stage2_pud_entry(kvm, pud, start_addr);
286} 285}
287 286
@@ -291,14 +290,14 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
291 phys_addr_t next, start_addr = addr; 290 phys_addr_t next, start_addr = addr;
292 pud_t *pud, *start_pud; 291 pud_t *pud, *start_pud;
293 292
294 start_pud = pud = stage2_pud_offset(pgd, addr); 293 start_pud = pud = stage2_pud_offset(kvm, pgd, addr);
295 do { 294 do {
296 next = stage2_pud_addr_end(addr, end); 295 next = stage2_pud_addr_end(kvm, addr, end);
297 if (!stage2_pud_none(*pud)) { 296 if (!stage2_pud_none(kvm, *pud)) {
298 if (stage2_pud_huge(*pud)) { 297 if (stage2_pud_huge(kvm, *pud)) {
299 pud_t old_pud = *pud; 298 pud_t old_pud = *pud;
300 299
301 stage2_pud_clear(pud); 300 stage2_pud_clear(kvm, pud);
302 kvm_tlb_flush_vmid_ipa(kvm, addr); 301 kvm_tlb_flush_vmid_ipa(kvm, addr);
303 kvm_flush_dcache_pud(old_pud); 302 kvm_flush_dcache_pud(old_pud);
304 put_page(virt_to_page(pud)); 303 put_page(virt_to_page(pud));
@@ -308,7 +307,7 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
308 } 307 }
309 } while (pud++, addr = next, addr != end); 308 } while (pud++, addr = next, addr != end);
310 309
311 if (stage2_pud_table_empty(start_pud)) 310 if (stage2_pud_table_empty(kvm, start_pud))
312 clear_stage2_pgd_entry(kvm, pgd, start_addr); 311 clear_stage2_pgd_entry(kvm, pgd, start_addr);
313} 312}
314 313
@@ -332,7 +331,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
332 assert_spin_locked(&kvm->mmu_lock); 331 assert_spin_locked(&kvm->mmu_lock);
333 WARN_ON(size & ~PAGE_MASK); 332 WARN_ON(size & ~PAGE_MASK);
334 333
335 pgd = kvm->arch.pgd + stage2_pgd_index(addr); 334 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
336 do { 335 do {
337 /* 336 /*
338 * Make sure the page table is still active, as another thread 337 * Make sure the page table is still active, as another thread
@@ -341,8 +340,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
341 */ 340 */
342 if (!READ_ONCE(kvm->arch.pgd)) 341 if (!READ_ONCE(kvm->arch.pgd))
343 break; 342 break;
344 next = stage2_pgd_addr_end(addr, end); 343 next = stage2_pgd_addr_end(kvm, addr, end);
345 if (!stage2_pgd_none(*pgd)) 344 if (!stage2_pgd_none(kvm, *pgd))
346 unmap_stage2_puds(kvm, pgd, addr, next); 345 unmap_stage2_puds(kvm, pgd, addr, next);
347 /* 346 /*
348 * If the range is too large, release the kvm->mmu_lock 347 * If the range is too large, release the kvm->mmu_lock
@@ -371,9 +370,9 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
371 pmd_t *pmd; 370 pmd_t *pmd;
372 phys_addr_t next; 371 phys_addr_t next;
373 372
374 pmd = stage2_pmd_offset(pud, addr); 373 pmd = stage2_pmd_offset(kvm, pud, addr);
375 do { 374 do {
376 next = stage2_pmd_addr_end(addr, end); 375 next = stage2_pmd_addr_end(kvm, addr, end);
377 if (!pmd_none(*pmd)) { 376 if (!pmd_none(*pmd)) {
378 if (pmd_thp_or_huge(*pmd)) 377 if (pmd_thp_or_huge(*pmd))
379 kvm_flush_dcache_pmd(*pmd); 378 kvm_flush_dcache_pmd(*pmd);
@@ -389,11 +388,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
389 pud_t *pud; 388 pud_t *pud;
390 phys_addr_t next; 389 phys_addr_t next;
391 390
392 pud = stage2_pud_offset(pgd, addr); 391 pud = stage2_pud_offset(kvm, pgd, addr);
393 do { 392 do {
394 next = stage2_pud_addr_end(addr, end); 393 next = stage2_pud_addr_end(kvm, addr, end);
395 if (!stage2_pud_none(*pud)) { 394 if (!stage2_pud_none(kvm, *pud)) {
396 if (stage2_pud_huge(*pud)) 395 if (stage2_pud_huge(kvm, *pud))
397 kvm_flush_dcache_pud(*pud); 396 kvm_flush_dcache_pud(*pud);
398 else 397 else
399 stage2_flush_pmds(kvm, pud, addr, next); 398 stage2_flush_pmds(kvm, pud, addr, next);
@@ -409,10 +408,11 @@ static void stage2_flush_memslot(struct kvm *kvm,
409 phys_addr_t next; 408 phys_addr_t next;
410 pgd_t *pgd; 409 pgd_t *pgd;
411 410
412 pgd = kvm->arch.pgd + stage2_pgd_index(addr); 411 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
413 do { 412 do {
414 next = stage2_pgd_addr_end(addr, end); 413 next = stage2_pgd_addr_end(kvm, addr, end);
415 stage2_flush_puds(kvm, pgd, addr, next); 414 if (!stage2_pgd_none(kvm, *pgd))
415 stage2_flush_puds(kvm, pgd, addr, next);
416 } while (pgd++, addr = next, addr != end); 416 } while (pgd++, addr = next, addr != end);
417} 417}
418 418
@@ -897,7 +897,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
897 } 897 }
898 898
899 /* Allocate the HW PGD, making sure that each page gets its own refcount */ 899 /* Allocate the HW PGD, making sure that each page gets its own refcount */
900 pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); 900 pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO);
901 if (!pgd) 901 if (!pgd)
902 return -ENOMEM; 902 return -ENOMEM;
903 903
@@ -986,7 +986,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
986 986
987 spin_lock(&kvm->mmu_lock); 987 spin_lock(&kvm->mmu_lock);
988 if (kvm->arch.pgd) { 988 if (kvm->arch.pgd) {
989 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 989 unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
990 pgd = READ_ONCE(kvm->arch.pgd); 990 pgd = READ_ONCE(kvm->arch.pgd);
991 kvm->arch.pgd = NULL; 991 kvm->arch.pgd = NULL;
992 } 992 }
@@ -994,7 +994,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
994 994
995 /* Free the HW pgd, one page at a time */ 995 /* Free the HW pgd, one page at a time */
996 if (pgd) 996 if (pgd)
997 free_pages_exact(pgd, S2_PGD_SIZE); 997 free_pages_exact(pgd, stage2_pgd_size(kvm));
998} 998}
999 999
1000static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 1000static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -1003,16 +1003,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
1003 pgd_t *pgd; 1003 pgd_t *pgd;
1004 pud_t *pud; 1004 pud_t *pud;
1005 1005
1006 pgd = kvm->arch.pgd + stage2_pgd_index(addr); 1006 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
1007 if (WARN_ON(stage2_pgd_none(*pgd))) { 1007 if (stage2_pgd_none(kvm, *pgd)) {
1008 if (!cache) 1008 if (!cache)
1009 return NULL; 1009 return NULL;
1010 pud = mmu_memory_cache_alloc(cache); 1010 pud = mmu_memory_cache_alloc(cache);
1011 stage2_pgd_populate(pgd, pud); 1011 stage2_pgd_populate(kvm, pgd, pud);
1012 get_page(virt_to_page(pgd)); 1012 get_page(virt_to_page(pgd));
1013 } 1013 }
1014 1014
1015 return stage2_pud_offset(pgd, addr); 1015 return stage2_pud_offset(kvm, pgd, addr);
1016} 1016}
1017 1017
1018static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 1018static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -1025,15 +1025,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
1025 if (!pud) 1025 if (!pud)
1026 return NULL; 1026 return NULL;
1027 1027
1028 if (stage2_pud_none(*pud)) { 1028 if (stage2_pud_none(kvm, *pud)) {
1029 if (!cache) 1029 if (!cache)
1030 return NULL; 1030 return NULL;
1031 pmd = mmu_memory_cache_alloc(cache); 1031 pmd = mmu_memory_cache_alloc(cache);
1032 stage2_pud_populate(pud, pmd); 1032 stage2_pud_populate(kvm, pud, pmd);
1033 get_page(virt_to_page(pud)); 1033 get_page(virt_to_page(pud));
1034 } 1034 }
1035 1035
1036 return stage2_pmd_offset(pud, addr); 1036 return stage2_pmd_offset(kvm, pud, addr);
1037} 1037}
1038 1038
1039static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache 1039static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
@@ -1207,8 +1207,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
1207 if (writable) 1207 if (writable)
1208 pte = kvm_s2pte_mkwrite(pte); 1208 pte = kvm_s2pte_mkwrite(pte);
1209 1209
1210 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, 1210 ret = mmu_topup_memory_cache(&cache,
1211 KVM_NR_MEM_OBJS); 1211 kvm_mmu_cache_min_pages(kvm),
1212 KVM_NR_MEM_OBJS);
1212 if (ret) 1213 if (ret)
1213 goto out; 1214 goto out;
1214 spin_lock(&kvm->mmu_lock); 1215 spin_lock(&kvm->mmu_lock);
@@ -1230,8 +1231,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
1230{ 1231{
1231 kvm_pfn_t pfn = *pfnp; 1232 kvm_pfn_t pfn = *pfnp;
1232 gfn_t gfn = *ipap >> PAGE_SHIFT; 1233 gfn_t gfn = *ipap >> PAGE_SHIFT;
1234 struct page *page = pfn_to_page(pfn);
1233 1235
1234 if (PageTransCompoundMap(pfn_to_page(pfn))) { 1236 /*
1237 * PageTransCompoungMap() returns true for THP and
1238 * hugetlbfs. Make sure the adjustment is done only for THP
1239 * pages.
1240 */
1241 if (!PageHuge(page) && PageTransCompoundMap(page)) {
1235 unsigned long mask; 1242 unsigned long mask;
1236 /* 1243 /*
1237 * The address we faulted on is backed by a transparent huge 1244 * The address we faulted on is backed by a transparent huge
@@ -1296,19 +1303,21 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1296 1303
1297/** 1304/**
1298 * stage2_wp_pmds - write protect PUD range 1305 * stage2_wp_pmds - write protect PUD range
1306 * kvm: kvm instance for the VM
1299 * @pud: pointer to pud entry 1307 * @pud: pointer to pud entry
1300 * @addr: range start address 1308 * @addr: range start address
1301 * @end: range end address 1309 * @end: range end address
1302 */ 1310 */
1303static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) 1311static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
1312 phys_addr_t addr, phys_addr_t end)
1304{ 1313{
1305 pmd_t *pmd; 1314 pmd_t *pmd;
1306 phys_addr_t next; 1315 phys_addr_t next;
1307 1316
1308 pmd = stage2_pmd_offset(pud, addr); 1317 pmd = stage2_pmd_offset(kvm, pud, addr);
1309 1318
1310 do { 1319 do {
1311 next = stage2_pmd_addr_end(addr, end); 1320 next = stage2_pmd_addr_end(kvm, addr, end);
1312 if (!pmd_none(*pmd)) { 1321 if (!pmd_none(*pmd)) {
1313 if (pmd_thp_or_huge(*pmd)) { 1322 if (pmd_thp_or_huge(*pmd)) {
1314 if (!kvm_s2pmd_readonly(pmd)) 1323 if (!kvm_s2pmd_readonly(pmd))
@@ -1328,18 +1337,19 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1328 * 1337 *
1329 * Process PUD entries, for a huge PUD we cause a panic. 1338 * Process PUD entries, for a huge PUD we cause a panic.
1330 */ 1339 */
1331static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) 1340static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
1341 phys_addr_t addr, phys_addr_t end)
1332{ 1342{
1333 pud_t *pud; 1343 pud_t *pud;
1334 phys_addr_t next; 1344 phys_addr_t next;
1335 1345
1336 pud = stage2_pud_offset(pgd, addr); 1346 pud = stage2_pud_offset(kvm, pgd, addr);
1337 do { 1347 do {
1338 next = stage2_pud_addr_end(addr, end); 1348 next = stage2_pud_addr_end(kvm, addr, end);
1339 if (!stage2_pud_none(*pud)) { 1349 if (!stage2_pud_none(kvm, *pud)) {
1340 /* TODO:PUD not supported, revisit later if supported */ 1350 /* TODO:PUD not supported, revisit later if supported */
1341 BUG_ON(stage2_pud_huge(*pud)); 1351 BUG_ON(stage2_pud_huge(kvm, *pud));
1342 stage2_wp_pmds(pud, addr, next); 1352 stage2_wp_pmds(kvm, pud, addr, next);
1343 } 1353 }
1344 } while (pud++, addr = next, addr != end); 1354 } while (pud++, addr = next, addr != end);
1345} 1355}
@@ -1355,7 +1365,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1355 pgd_t *pgd; 1365 pgd_t *pgd;
1356 phys_addr_t next; 1366 phys_addr_t next;
1357 1367
1358 pgd = kvm->arch.pgd + stage2_pgd_index(addr); 1368 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
1359 do { 1369 do {
1360 /* 1370 /*
1361 * Release kvm_mmu_lock periodically if the memory region is 1371 * Release kvm_mmu_lock periodically if the memory region is
@@ -1369,9 +1379,9 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1369 cond_resched_lock(&kvm->mmu_lock); 1379 cond_resched_lock(&kvm->mmu_lock);
1370 if (!READ_ONCE(kvm->arch.pgd)) 1380 if (!READ_ONCE(kvm->arch.pgd))
1371 break; 1381 break;
1372 next = stage2_pgd_addr_end(addr, end); 1382 next = stage2_pgd_addr_end(kvm, addr, end);
1373 if (stage2_pgd_present(*pgd)) 1383 if (stage2_pgd_present(kvm, *pgd))
1374 stage2_wp_puds(pgd, addr, next); 1384 stage2_wp_puds(kvm, pgd, addr, next);
1375 } while (pgd++, addr = next, addr != end); 1385 } while (pgd++, addr = next, addr != end);
1376} 1386}
1377 1387
@@ -1520,7 +1530,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1520 up_read(&current->mm->mmap_sem); 1530 up_read(&current->mm->mmap_sem);
1521 1531
1522 /* We need minimum second+third level pages */ 1532 /* We need minimum second+third level pages */
1523 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, 1533 ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm),
1524 KVM_NR_MEM_OBJS); 1534 KVM_NR_MEM_OBJS);
1525 if (ret) 1535 if (ret)
1526 return ret; 1536 return ret;
@@ -1763,7 +1773,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
1763 } 1773 }
1764 1774
1765 /* Userspace should not be able to register out-of-bounds IPAs */ 1775 /* Userspace should not be able to register out-of-bounds IPAs */
1766 VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); 1776 VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
1767 1777
1768 if (fault_status == FSC_ACCESS) { 1778 if (fault_status == FSC_ACCESS) {
1769 handle_access_fault(vcpu, fault_ipa); 1779 handle_access_fault(vcpu, fault_ipa);
@@ -2062,7 +2072,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
2062 * space addressable by the KVM guest IPA space. 2072 * space addressable by the KVM guest IPA space.
2063 */ 2073 */
2064 if (memslot->base_gfn + memslot->npages >= 2074 if (memslot->base_gfn + memslot->npages >=
2065 (KVM_PHYS_SIZE >> PAGE_SHIFT)) 2075 (kvm_phys_size(kvm) >> PAGE_SHIFT))
2066 return -EFAULT; 2076 return -EFAULT;
2067 2077
2068 down_read(&current->mm->mmap_sem); 2078 down_read(&current->mm->mmap_sem);
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 12502251727e..eb2a390a6c86 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -241,13 +241,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
241 list_for_each_entry(dev, &(its)->device_list, dev_list) \ 241 list_for_each_entry(dev, &(its)->device_list, dev_list) \
242 list_for_each_entry(ite, &(dev)->itt_head, ite_list) 242 list_for_each_entry(ite, &(dev)->itt_head, ite_list)
243 243
244/*
245 * We only implement 48 bits of PA at the moment, although the ITS
246 * supports more. Let's be restrictive here.
247 */
248#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16))
249#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12))
250
251#define GIC_LPI_OFFSET 8192 244#define GIC_LPI_OFFSET 8192
252 245
253#define VITS_TYPER_IDBITS 16 246#define VITS_TYPER_IDBITS 16
@@ -759,6 +752,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
759{ 752{
760 int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; 753 int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
761 u64 indirect_ptr, type = GITS_BASER_TYPE(baser); 754 u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
755 phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
762 int esz = GITS_BASER_ENTRY_SIZE(baser); 756 int esz = GITS_BASER_ENTRY_SIZE(baser);
763 int index; 757 int index;
764 gfn_t gfn; 758 gfn_t gfn;
@@ -783,7 +777,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
783 if (id >= (l1_tbl_size / esz)) 777 if (id >= (l1_tbl_size / esz))
784 return false; 778 return false;
785 779
786 addr = BASER_ADDRESS(baser) + id * esz; 780 addr = base + id * esz;
787 gfn = addr >> PAGE_SHIFT; 781 gfn = addr >> PAGE_SHIFT;
788 782
789 if (eaddr) 783 if (eaddr)
@@ -798,7 +792,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
798 792
799 /* Each 1st level entry is represented by a 64-bit value. */ 793 /* Each 1st level entry is represented by a 64-bit value. */
800 if (kvm_read_guest_lock(its->dev->kvm, 794 if (kvm_read_guest_lock(its->dev->kvm,
801 BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), 795 base + index * sizeof(indirect_ptr),
802 &indirect_ptr, sizeof(indirect_ptr))) 796 &indirect_ptr, sizeof(indirect_ptr)))
803 return false; 797 return false;
804 798
@@ -808,11 +802,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
808 if (!(indirect_ptr & BIT_ULL(63))) 802 if (!(indirect_ptr & BIT_ULL(63)))
809 return false; 803 return false;
810 804
811 /* 805 /* Mask the guest physical address and calculate the frame number. */
812 * Mask the guest physical address and calculate the frame number.
813 * Any address beyond our supported 48 bits of PA will be caught
814 * by the actual check in the final step.
815 */
816 indirect_ptr &= GENMASK_ULL(51, 16); 806 indirect_ptr &= GENMASK_ULL(51, 16);
817 807
818 /* Find the address of the actual entry */ 808 /* Find the address of the actual entry */
@@ -1304,9 +1294,6 @@ static u64 vgic_sanitise_its_baser(u64 reg)
1304 GITS_BASER_OUTER_CACHEABILITY_SHIFT, 1294 GITS_BASER_OUTER_CACHEABILITY_SHIFT,
1305 vgic_sanitise_outer_cacheability); 1295 vgic_sanitise_outer_cacheability);
1306 1296
1307 /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */
1308 reg &= ~GENMASK_ULL(15, 12);
1309
1310 /* We support only one (ITS) page size: 64K */ 1297 /* We support only one (ITS) page size: 64K */
1311 reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; 1298 reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K;
1312 1299
@@ -1325,11 +1312,8 @@ static u64 vgic_sanitise_its_cbaser(u64 reg)
1325 GITS_CBASER_OUTER_CACHEABILITY_SHIFT, 1312 GITS_CBASER_OUTER_CACHEABILITY_SHIFT,
1326 vgic_sanitise_outer_cacheability); 1313 vgic_sanitise_outer_cacheability);
1327 1314
1328 /* 1315 /* Sanitise the physical address to be 64k aligned. */
1329 * Sanitise the physical address to be 64k aligned. 1316 reg &= ~GENMASK_ULL(15, 12);
1330 * Also limit the physical addresses to 48 bits.
1331 */
1332 reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12));
1333 1317
1334 return reg; 1318 return reg;
1335} 1319}
@@ -1375,7 +1359,7 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
1375 if (!its->enabled) 1359 if (!its->enabled)
1376 return; 1360 return;
1377 1361
1378 cbaser = CBASER_ADDRESS(its->cbaser); 1362 cbaser = GITS_CBASER_ADDRESS(its->cbaser);
1379 1363
1380 while (its->cwriter != its->creadr) { 1364 while (its->cwriter != its->creadr) {
1381 int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, 1365 int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
@@ -2233,7 +2217,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
2233 if (!(baser & GITS_BASER_VALID)) 2217 if (!(baser & GITS_BASER_VALID))
2234 return 0; 2218 return 0;
2235 2219
2236 l1_gpa = BASER_ADDRESS(baser); 2220 l1_gpa = GITS_BASER_ADDR_48_to_52(baser);
2237 2221
2238 if (baser & GITS_BASER_INDIRECT) { 2222 if (baser & GITS_BASER_INDIRECT) {
2239 l1_esz = GITS_LVL1_ENTRY_SIZE; 2223 l1_esz = GITS_LVL1_ENTRY_SIZE;
@@ -2305,7 +2289,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
2305{ 2289{
2306 const struct vgic_its_abi *abi = vgic_its_get_abi(its); 2290 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2307 u64 baser = its->baser_coll_table; 2291 u64 baser = its->baser_coll_table;
2308 gpa_t gpa = BASER_ADDRESS(baser); 2292 gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser);
2309 struct its_collection *collection; 2293 struct its_collection *collection;
2310 u64 val; 2294 u64 val;
2311 size_t max_size, filled = 0; 2295 size_t max_size, filled = 0;
@@ -2354,7 +2338,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
2354 if (!(baser & GITS_BASER_VALID)) 2338 if (!(baser & GITS_BASER_VALID))
2355 return 0; 2339 return 0;
2356 2340
2357 gpa = BASER_ADDRESS(baser); 2341 gpa = GITS_BASER_ADDR_48_to_52(baser);
2358 2342
2359 max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; 2343 max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
2360 2344
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index 6ada2432e37c..114dce9f4bf5 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -25,7 +25,7 @@
25int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, 25int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
26 phys_addr_t addr, phys_addr_t alignment) 26 phys_addr_t addr, phys_addr_t alignment)
27{ 27{
28 if (addr & ~KVM_PHYS_MASK) 28 if (addr & ~kvm_phys_mask(kvm))
29 return -E2BIG; 29 return -E2BIG;
30 30
31 if (!IS_ALIGNED(addr, alignment)) 31 if (!IS_ALIGNED(addr, alignment))
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index a2a175b08b17..b3d1f0985117 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg)
364 vgic_sanitise_outer_cacheability); 364 vgic_sanitise_outer_cacheability);
365 365
366 reg &= ~PENDBASER_RES0_MASK; 366 reg &= ~PENDBASER_RES0_MASK;
367 reg &= ~GENMASK_ULL(51, 48);
368 367
369 return reg; 368 return reg;
370} 369}
@@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg)
382 vgic_sanitise_outer_cacheability); 381 vgic_sanitise_outer_cacheability);
383 382
384 reg &= ~PROPBASER_RES0_MASK; 383 reg &= ~PROPBASER_RES0_MASK;
385 reg &= ~GENMASK_ULL(51, 48);
386 return reg; 384 return reg;
387} 385}
388 386