diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2018-10-19 09:24:24 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2018-10-19 09:24:24 -0400 |
commit | e42b4a507efa19a90c63e7968c93c4f82d3bc805 (patch) | |
tree | cc7bf34323b26d3f53d4a9ec4ca1b5c52bc361cd | |
parent | 1e58e5e59148916fa43444a406335a990783fb78 (diff) | |
parent | e4e11cc0f81ee7be17d6f6fb96128a6d51c0e838 (diff) |
Merge tag 'kvmarm-for-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm updates for 4.20
- Improved guest IPA space support (32 to 52 bits)
- RAS event delivery for 32bit
- PMU fixes
- Guest entry hardening
- Various cleanups
65 files changed, 1118 insertions, 565 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 07e87a7c665d..cd209f7730af 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -123,6 +123,37 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the | |||
123 | flag KVM_VM_MIPS_VZ. | 123 | flag KVM_VM_MIPS_VZ. |
124 | 124 | ||
125 | 125 | ||
126 | On arm64, the physical address size for a VM (IPA Size limit) is limited | ||
127 | to 40bits by default. The limit can be configured if the host supports the | ||
128 | extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use | ||
129 | KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type | ||
130 | identifier, where IPA_Bits is the maximum width of any physical | ||
131 | address used by the VM. The IPA_Bits is encoded in bits[7-0] of the | ||
132 | machine type identifier. | ||
133 | |||
134 | e.g, to configure a guest to use 48bit physical address size : | ||
135 | |||
136 | vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48)); | ||
137 | |||
138 | The requested size (IPA_Bits) must be : | ||
139 | 0 - Implies default size, 40bits (for backward compatibility) | ||
140 | |||
141 | or | ||
142 | |||
143 | N - Implies N bits, where N is a positive integer such that, | ||
144 | 32 <= N <= Host_IPA_Limit | ||
145 | |||
146 | Host_IPA_Limit is the maximum possible value for IPA_Bits on the host and | ||
147 | is dependent on the CPU capability and the kernel configuration. The limit can | ||
148 | be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION | ||
149 | ioctl() at run-time. | ||
150 | |||
151 | Please note that configuring the IPA size does not affect the capability | ||
152 | exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects | ||
153 | size of the address translated by the stage2 level (guest physical to | ||
154 | host physical address translations). | ||
155 | |||
156 | |||
126 | 4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST | 157 | 4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST |
127 | 158 | ||
128 | Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST | 159 | Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST |
diff --git a/MAINTAINERS b/MAINTAINERS index 1610fb26bdac..86e019c7b0fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -12260,6 +12260,7 @@ F: Documentation/networking/rds.txt | |||
12260 | 12260 | ||
12261 | RDT - RESOURCE ALLOCATION | 12261 | RDT - RESOURCE ALLOCATION |
12262 | M: Fenghua Yu <fenghua.yu@intel.com> | 12262 | M: Fenghua Yu <fenghua.yu@intel.com> |
12263 | M: Reinette Chatre <reinette.chatre@intel.com> | ||
12263 | L: linux-kernel@vger.kernel.org | 12264 | L: linux-kernel@vger.kernel.org |
12264 | S: Supported | 12265 | S: Supported |
12265 | F: arch/x86/kernel/cpu/intel_rdt* | 12266 | F: arch/x86/kernel/cpu/intel_rdt* |
@@ -15924,6 +15925,7 @@ F: net/x25/ | |||
15924 | X86 ARCHITECTURE (32-BIT AND 64-BIT) | 15925 | X86 ARCHITECTURE (32-BIT AND 64-BIT) |
15925 | M: Thomas Gleixner <tglx@linutronix.de> | 15926 | M: Thomas Gleixner <tglx@linutronix.de> |
15926 | M: Ingo Molnar <mingo@redhat.com> | 15927 | M: Ingo Molnar <mingo@redhat.com> |
15928 | M: Borislav Petkov <bp@alien8.de> | ||
15927 | R: "H. Peter Anvin" <hpa@zytor.com> | 15929 | R: "H. Peter Anvin" <hpa@zytor.com> |
15928 | M: x86@kernel.org | 15930 | M: x86@kernel.org |
15929 | L: linux-kernel@vger.kernel.org | 15931 | L: linux-kernel@vger.kernel.org |
@@ -15952,6 +15954,15 @@ M: Borislav Petkov <bp@alien8.de> | |||
15952 | S: Maintained | 15954 | S: Maintained |
15953 | F: arch/x86/kernel/cpu/microcode/* | 15955 | F: arch/x86/kernel/cpu/microcode/* |
15954 | 15956 | ||
15957 | X86 MM | ||
15958 | M: Dave Hansen <dave.hansen@linux.intel.com> | ||
15959 | M: Andy Lutomirski <luto@kernel.org> | ||
15960 | M: Peter Zijlstra <peterz@infradead.org> | ||
15961 | L: linux-kernel@vger.kernel.org | ||
15962 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm | ||
15963 | S: Maintained | ||
15964 | F: arch/x86/mm/ | ||
15965 | |||
15955 | X86 PLATFORM DRIVERS | 15966 | X86 PLATFORM DRIVERS |
15956 | M: Darren Hart <dvhart@infradead.org> | 15967 | M: Darren Hart <dvhart@infradead.org> |
15957 | M: Andy Shevchenko <andy@infradead.org> | 15968 | M: Andy Shevchenko <andy@infradead.org> |
@@ -2,7 +2,7 @@ | |||
2 | VERSION = 4 | 2 | VERSION = 4 |
3 | PATCHLEVEL = 19 | 3 | PATCHLEVEL = 19 |
4 | SUBLEVEL = 0 | 4 | SUBLEVEL = 0 |
5 | EXTRAVERSION = -rc4 | 5 | EXTRAVERSION = -rc5 |
6 | NAME = Merciless Moray | 6 | NAME = Merciless Moray |
7 | 7 | ||
8 | # *DOCUMENTATION* | 8 | # *DOCUMENTATION* |
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 3ab8b3781bfe..c3f1f9b304b7 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h | |||
@@ -133,8 +133,7 @@ | |||
133 | * space. | 133 | * space. |
134 | */ | 134 | */ |
135 | #define KVM_PHYS_SHIFT (40) | 135 | #define KVM_PHYS_SHIFT (40) |
136 | #define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) | 136 | |
137 | #define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) | ||
138 | #define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) | 137 | #define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) |
139 | 138 | ||
140 | /* Virtualization Translation Control Register (VTCR) bits */ | 139 | /* Virtualization Translation Control Register (VTCR) bits */ |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 3ad482d2f1eb..5ca5d9af0c26 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void) | |||
273 | kvm_call_hyp(__init_stage2_translation); | 273 | kvm_call_hyp(__init_stage2_translation); |
274 | } | 274 | } |
275 | 275 | ||
276 | static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) | 276 | static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) |
277 | { | 277 | { |
278 | return 0; | 278 | return 0; |
279 | } | 279 | } |
@@ -354,4 +354,15 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} | |||
354 | struct kvm *kvm_arch_alloc_vm(void); | 354 | struct kvm *kvm_arch_alloc_vm(void); |
355 | void kvm_arch_free_vm(struct kvm *kvm); | 355 | void kvm_arch_free_vm(struct kvm *kvm); |
356 | 356 | ||
357 | static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) | ||
358 | { | ||
359 | /* | ||
360 | * On 32bit ARM, VMs get a static 40bit IPA stage2 setup, | ||
361 | * so any non-zero value used as type is illegal. | ||
362 | */ | ||
363 | if (type) | ||
364 | return -EINVAL; | ||
365 | return 0; | ||
366 | } | ||
367 | |||
357 | #endif /* __ARM_KVM_HOST_H__ */ | 368 | #endif /* __ARM_KVM_HOST_H__ */ |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 265ea9cf7df7..5ad1a54f98dc 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -35,16 +35,12 @@ | |||
35 | addr; \ | 35 | addr; \ |
36 | }) | 36 | }) |
37 | 37 | ||
38 | /* | ||
39 | * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. | ||
40 | */ | ||
41 | #define KVM_MMU_CACHE_MIN_PAGES 2 | ||
42 | |||
43 | #ifndef __ASSEMBLY__ | 38 | #ifndef __ASSEMBLY__ |
44 | 39 | ||
45 | #include <linux/highmem.h> | 40 | #include <linux/highmem.h> |
46 | #include <asm/cacheflush.h> | 41 | #include <asm/cacheflush.h> |
47 | #include <asm/cputype.h> | 42 | #include <asm/cputype.h> |
43 | #include <asm/kvm_arm.h> | ||
48 | #include <asm/kvm_hyp.h> | 44 | #include <asm/kvm_hyp.h> |
49 | #include <asm/pgalloc.h> | 45 | #include <asm/pgalloc.h> |
50 | #include <asm/stage2_pgtable.h> | 46 | #include <asm/stage2_pgtable.h> |
@@ -52,6 +48,13 @@ | |||
52 | /* Ensure compatibility with arm64 */ | 48 | /* Ensure compatibility with arm64 */ |
53 | #define VA_BITS 32 | 49 | #define VA_BITS 32 |
54 | 50 | ||
51 | #define kvm_phys_shift(kvm) KVM_PHYS_SHIFT | ||
52 | #define kvm_phys_size(kvm) (1ULL << kvm_phys_shift(kvm)) | ||
53 | #define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - 1ULL) | ||
54 | #define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK | ||
55 | |||
56 | #define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t)) | ||
57 | |||
55 | int create_hyp_mappings(void *from, void *to, pgprot_t prot); | 58 | int create_hyp_mappings(void *from, void *to, pgprot_t prot); |
56 | int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, | 59 | int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, |
57 | void __iomem **kaddr, | 60 | void __iomem **kaddr, |
@@ -355,6 +358,8 @@ static inline int hyp_map_aux_data(void) | |||
355 | 358 | ||
356 | #define kvm_phys_to_vttbr(addr) (addr) | 359 | #define kvm_phys_to_vttbr(addr) (addr) |
357 | 360 | ||
361 | static inline void kvm_set_ipa_limit(void) {} | ||
362 | |||
358 | #endif /* !__ASSEMBLY__ */ | 363 | #endif /* !__ASSEMBLY__ */ |
359 | 364 | ||
360 | #endif /* __ARM_KVM_MMU_H__ */ | 365 | #endif /* __ARM_KVM_MMU_H__ */ |
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h index 460d616bb2d6..f6a7ea805232 100644 --- a/arch/arm/include/asm/stage2_pgtable.h +++ b/arch/arm/include/asm/stage2_pgtable.h | |||
@@ -19,43 +19,53 @@ | |||
19 | #ifndef __ARM_S2_PGTABLE_H_ | 19 | #ifndef __ARM_S2_PGTABLE_H_ |
20 | #define __ARM_S2_PGTABLE_H_ | 20 | #define __ARM_S2_PGTABLE_H_ |
21 | 21 | ||
22 | #define stage2_pgd_none(pgd) pgd_none(pgd) | 22 | /* |
23 | #define stage2_pgd_clear(pgd) pgd_clear(pgd) | 23 | * kvm_mmu_cache_min_pages() is the number of pages required |
24 | #define stage2_pgd_present(pgd) pgd_present(pgd) | 24 | * to install a stage-2 translation. We pre-allocate the entry |
25 | #define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) | 25 | * level table at VM creation. Since we have a 3 level page-table, |
26 | #define stage2_pud_offset(pgd, address) pud_offset(pgd, address) | 26 | * we need only two pages to add a new mapping. |
27 | #define stage2_pud_free(pud) pud_free(NULL, pud) | 27 | */ |
28 | 28 | #define kvm_mmu_cache_min_pages(kvm) 2 | |
29 | #define stage2_pud_none(pud) pud_none(pud) | 29 | |
30 | #define stage2_pud_clear(pud) pud_clear(pud) | 30 | #define stage2_pgd_none(kvm, pgd) pgd_none(pgd) |
31 | #define stage2_pud_present(pud) pud_present(pud) | 31 | #define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd) |
32 | #define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) | 32 | #define stage2_pgd_present(kvm, pgd) pgd_present(pgd) |
33 | #define stage2_pmd_offset(pud, address) pmd_offset(pud, address) | 33 | #define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud) |
34 | #define stage2_pmd_free(pmd) pmd_free(NULL, pmd) | 34 | #define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address) |
35 | 35 | #define stage2_pud_free(kvm, pud) pud_free(NULL, pud) | |
36 | #define stage2_pud_huge(pud) pud_huge(pud) | 36 | |
37 | #define stage2_pud_none(kvm, pud) pud_none(pud) | ||
38 | #define stage2_pud_clear(kvm, pud) pud_clear(pud) | ||
39 | #define stage2_pud_present(kvm, pud) pud_present(pud) | ||
40 | #define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd) | ||
41 | #define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address) | ||
42 | #define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd) | ||
43 | |||
44 | #define stage2_pud_huge(kvm, pud) pud_huge(pud) | ||
37 | 45 | ||
38 | /* Open coded p*d_addr_end that can deal with 64bit addresses */ | 46 | /* Open coded p*d_addr_end that can deal with 64bit addresses */ |
39 | static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) | 47 | static inline phys_addr_t |
48 | stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | ||
40 | { | 49 | { |
41 | phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK; | 50 | phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK; |
42 | 51 | ||
43 | return (boundary - 1 < end - 1) ? boundary : end; | 52 | return (boundary - 1 < end - 1) ? boundary : end; |
44 | } | 53 | } |
45 | 54 | ||
46 | #define stage2_pud_addr_end(addr, end) (end) | 55 | #define stage2_pud_addr_end(kvm, addr, end) (end) |
47 | 56 | ||
48 | static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) | 57 | static inline phys_addr_t |
58 | stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | ||
49 | { | 59 | { |
50 | phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK; | 60 | phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK; |
51 | 61 | ||
52 | return (boundary - 1 < end - 1) ? boundary : end; | 62 | return (boundary - 1 < end - 1) ? boundary : end; |
53 | } | 63 | } |
54 | 64 | ||
55 | #define stage2_pgd_index(addr) pgd_index(addr) | 65 | #define stage2_pgd_index(kvm, addr) pgd_index(addr) |
56 | 66 | ||
57 | #define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) | 67 | #define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) |
58 | #define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) | 68 | #define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) |
59 | #define stage2_pud_table_empty(pudp) false | 69 | #define stage2_pud_table_empty(kvm, pudp) false |
60 | 70 | ||
61 | #endif /* __ARM_S2_PGTABLE_H_ */ | 71 | #endif /* __ARM_S2_PGTABLE_H_ */ |
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1717ba1db35d..072cc1c970c2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h | |||
@@ -530,6 +530,26 @@ void arm64_set_ssbd_mitigation(bool state); | |||
530 | static inline void arm64_set_ssbd_mitigation(bool state) {} | 530 | static inline void arm64_set_ssbd_mitigation(bool state) {} |
531 | #endif | 531 | #endif |
532 | 532 | ||
533 | static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) | ||
534 | { | ||
535 | switch (parange) { | ||
536 | case 0: return 32; | ||
537 | case 1: return 36; | ||
538 | case 2: return 40; | ||
539 | case 3: return 42; | ||
540 | case 4: return 44; | ||
541 | case 5: return 48; | ||
542 | case 6: return 52; | ||
543 | /* | ||
544 | * A future PE could use a value unknown to the kernel. | ||
545 | * However, by the "D10.1.4 Principles of the ID scheme | ||
546 | * for fields in ID registers", ARM DDI 0487C.a, any new | ||
547 | * value is guaranteed to be higher than what we know already. | ||
548 | * As a safe limit, we return the limit supported by the kernel. | ||
549 | */ | ||
550 | default: return CONFIG_ARM64_PA_BITS; | ||
551 | } | ||
552 | } | ||
533 | #endif /* __ASSEMBLY__ */ | 553 | #endif /* __ASSEMBLY__ */ |
534 | 554 | ||
535 | #endif | 555 | #endif |
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index aa45df752a16..6e324d1f1231 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h | |||
@@ -107,6 +107,7 @@ | |||
107 | #define VTCR_EL2_RES1 (1 << 31) | 107 | #define VTCR_EL2_RES1 (1 << 31) |
108 | #define VTCR_EL2_HD (1 << 22) | 108 | #define VTCR_EL2_HD (1 << 22) |
109 | #define VTCR_EL2_HA (1 << 21) | 109 | #define VTCR_EL2_HA (1 << 21) |
110 | #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT | ||
110 | #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK | 111 | #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK |
111 | #define VTCR_EL2_TG0_MASK TCR_TG0_MASK | 112 | #define VTCR_EL2_TG0_MASK TCR_TG0_MASK |
112 | #define VTCR_EL2_TG0_4K TCR_TG0_4K | 113 | #define VTCR_EL2_TG0_4K TCR_TG0_4K |
@@ -120,62 +121,149 @@ | |||
120 | #define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA | 121 | #define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA |
121 | #define VTCR_EL2_SL0_SHIFT 6 | 122 | #define VTCR_EL2_SL0_SHIFT 6 |
122 | #define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) | 123 | #define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) |
123 | #define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT) | ||
124 | #define VTCR_EL2_T0SZ_MASK 0x3f | 124 | #define VTCR_EL2_T0SZ_MASK 0x3f |
125 | #define VTCR_EL2_T0SZ_40B 24 | ||
126 | #define VTCR_EL2_VS_SHIFT 19 | 125 | #define VTCR_EL2_VS_SHIFT 19 |
127 | #define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT) | 126 | #define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT) |
128 | #define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT) | 127 | #define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT) |
129 | 128 | ||
129 | #define VTCR_EL2_T0SZ(x) TCR_T0SZ(x) | ||
130 | |||
130 | /* | 131 | /* |
131 | * We configure the Stage-2 page tables to always restrict the IPA space to be | 132 | * We configure the Stage-2 page tables to always restrict the IPA space to be |
132 | * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are | 133 | * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are |
133 | * not known to exist and will break with this configuration. | 134 | * not known to exist and will break with this configuration. |
134 | * | 135 | * |
135 | * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time | 136 | * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). |
136 | * (see hyp-init.S). | ||
137 | * | 137 | * |
138 | * Note that when using 4K pages, we concatenate two first level page tables | 138 | * Note that when using 4K pages, we concatenate two first level page tables |
139 | * together. With 16K pages, we concatenate 16 first level page tables. | 139 | * together. With 16K pages, we concatenate 16 first level page tables. |
140 | * | 140 | * |
141 | * The magic numbers used for VTTBR_X in this patch can be found in Tables | ||
142 | * D4-23 and D4-25 in ARM DDI 0487A.b. | ||
143 | */ | 141 | */ |
144 | 142 | ||
145 | #define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B | ||
146 | #define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ | 143 | #define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ |
147 | VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) | 144 | VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) |
148 | 145 | ||
149 | #ifdef CONFIG_ARM64_64K_PAGES | ||
150 | /* | 146 | /* |
151 | * Stage2 translation configuration: | 147 | * VTCR_EL2:SL0 indicates the entry level for Stage2 translation. |
152 | * 64kB pages (TG0 = 1) | 148 | * Interestingly, it depends on the page size. |
153 | * 2 level page tables (SL = 1) | 149 | * See D.10.2.121, VTCR_EL2, in ARM DDI 0487C.a |
150 | * | ||
151 | * ----------------------------------------- | ||
152 | * | Entry level | 4K | 16K/64K | | ||
153 | * ------------------------------------------ | ||
154 | * | Level: 0 | 2 | - | | ||
155 | * ------------------------------------------ | ||
156 | * | Level: 1 | 1 | 2 | | ||
157 | * ------------------------------------------ | ||
158 | * | Level: 2 | 0 | 1 | | ||
159 | * ------------------------------------------ | ||
160 | * | Level: 3 | - | 0 | | ||
161 | * ------------------------------------------ | ||
162 | * | ||
163 | * The table roughly translates to : | ||
164 | * | ||
165 | * SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level | ||
166 | * | ||
167 | * Where TGRAN_SL0_BASE is a magic number depending on the page size: | ||
168 | * TGRAN_SL0_BASE(4K) = 2 | ||
169 | * TGRAN_SL0_BASE(16K) = 3 | ||
170 | * TGRAN_SL0_BASE(64K) = 3 | ||
171 | * provided we take care of ruling out the unsupported cases and | ||
172 | * Entry_Level = 4 - Number_of_levels. | ||
173 | * | ||
154 | */ | 174 | */ |
155 | #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1) | 175 | #ifdef CONFIG_ARM64_64K_PAGES |
156 | #define VTTBR_X_TGRAN_MAGIC 38 | 176 | |
177 | #define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K | ||
178 | #define VTCR_EL2_TGRAN_SL0_BASE 3UL | ||
179 | |||
157 | #elif defined(CONFIG_ARM64_16K_PAGES) | 180 | #elif defined(CONFIG_ARM64_16K_PAGES) |
158 | /* | 181 | |
159 | * Stage2 translation configuration: | 182 | #define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K |
160 | * 16kB pages (TG0 = 2) | 183 | #define VTCR_EL2_TGRAN_SL0_BASE 3UL |
161 | * 2 level page tables (SL = 1) | 184 | |
162 | */ | ||
163 | #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1) | ||
164 | #define VTTBR_X_TGRAN_MAGIC 42 | ||
165 | #else /* 4K */ | 185 | #else /* 4K */ |
166 | /* | 186 | |
167 | * Stage2 translation configuration: | 187 | #define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K |
168 | * 4kB pages (TG0 = 0) | 188 | #define VTCR_EL2_TGRAN_SL0_BASE 2UL |
169 | * 3 level page tables (SL = 1) | 189 | |
170 | */ | ||
171 | #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1) | ||
172 | #define VTTBR_X_TGRAN_MAGIC 37 | ||
173 | #endif | 190 | #endif |
174 | 191 | ||
175 | #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) | 192 | #define VTCR_EL2_LVLS_TO_SL0(levels) \ |
176 | #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) | 193 | ((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT) |
194 | #define VTCR_EL2_SL0_TO_LVLS(sl0) \ | ||
195 | ((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE) | ||
196 | #define VTCR_EL2_LVLS(vtcr) \ | ||
197 | VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT) | ||
198 | |||
199 | #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN) | ||
200 | #define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK)) | ||
201 | |||
202 | /* | ||
203 | * ARM VMSAv8-64 defines an algorithm for finding the translation table | ||
204 | * descriptors in section D4.2.8 in ARM DDI 0487C.a. | ||
205 | * | ||
206 | * The algorithm defines the expectations on the translation table | ||
207 | * addresses for each level, based on PAGE_SIZE, entry level | ||
208 | * and the translation table size (T0SZ). The variable "x" in the | ||
209 | * algorithm determines the alignment of a table base address at a given | ||
210 | * level and thus determines the alignment of VTTBR:BADDR for stage2 | ||
211 | * page table entry level. | ||
212 | * Since the number of bits resolved at the entry level could vary | ||
213 | * depending on the T0SZ, the value of "x" is defined based on a | ||
214 | * Magic constant for a given PAGE_SIZE and Entry Level. The | ||
215 | * intermediate levels must be always aligned to the PAGE_SIZE (i.e, | ||
216 | * x = PAGE_SHIFT). | ||
217 | * | ||
218 | * The value of "x" for entry level is calculated as : | ||
219 | * x = Magic_N - T0SZ | ||
220 | * | ||
221 | * where Magic_N is an integer depending on the page size and the entry | ||
222 | * level of the page table as below: | ||
223 | * | ||
224 | * -------------------------------------------- | ||
225 | * | Entry level | 4K 16K 64K | | ||
226 | * -------------------------------------------- | ||
227 | * | Level: 0 (4 levels) | 28 | - | - | | ||
228 | * -------------------------------------------- | ||
229 | * | Level: 1 (3 levels) | 37 | 31 | 25 | | ||
230 | * -------------------------------------------- | ||
231 | * | Level: 2 (2 levels) | 46 | 42 | 38 | | ||
232 | * -------------------------------------------- | ||
233 | * | Level: 3 (1 level) | - | 53 | 51 | | ||
234 | * -------------------------------------------- | ||
235 | * | ||
236 | * We have a magic formula for the Magic_N below: | ||
237 | * | ||
238 | * Magic_N(PAGE_SIZE, Level) = 64 - ((PAGE_SHIFT - 3) * Number_of_levels) | ||
239 | * | ||
240 | * where Number_of_levels = (4 - Level). We are only interested in the | ||
241 | * value for Entry_Level for the stage2 page table. | ||
242 | * | ||
243 | * So, given that T0SZ = (64 - IPA_SHIFT), we can compute 'x' as follows: | ||
244 | * | ||
245 | * x = (64 - ((PAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT) | ||
246 | * = IPA_SHIFT - ((PAGE_SHIFT - 3) * Number of levels) | ||
247 | * | ||
248 | * Here is one way to explain the Magic Formula: | ||
249 | * | ||
250 | * x = log2(Size_of_Entry_Level_Table) | ||
251 | * | ||
252 | * Since, we can resolve (PAGE_SHIFT - 3) bits at each level, and another | ||
253 | * PAGE_SHIFT bits in the PTE, we have : | ||
254 | * | ||
255 | * Bits_Entry_level = IPA_SHIFT - ((PAGE_SHIFT - 3) * (n - 1) + PAGE_SHIFT) | ||
256 | * = IPA_SHIFT - (PAGE_SHIFT - 3) * n - 3 | ||
257 | * where n = number of levels, and since each pointer is 8bytes, we have: | ||
258 | * | ||
259 | * x = Bits_Entry_Level + 3 | ||
260 | * = IPA_SHIFT - (PAGE_SHIFT - 3) * n | ||
261 | * | ||
262 | * The only constraint here is that, we have to find the number of page table | ||
263 | * levels for a given IPA size (which we do, see stage2_pt_levels()) | ||
264 | */ | ||
265 | #define ARM64_VTTBR_X(ipa, levels) ((ipa) - ((levels) * (PAGE_SHIFT - 3))) | ||
177 | 266 | ||
178 | #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) | ||
179 | #define VTTBR_VMID_SHIFT (UL(48)) | 267 | #define VTTBR_VMID_SHIFT (UL(48)) |
180 | #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) | 268 | #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) |
181 | 269 | ||
@@ -223,6 +311,13 @@ | |||
223 | 311 | ||
224 | /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ | 312 | /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ |
225 | #define HPFAR_MASK (~UL(0xf)) | 313 | #define HPFAR_MASK (~UL(0xf)) |
314 | /* | ||
315 | * We have | ||
316 | * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12] | ||
317 | * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12] | ||
318 | */ | ||
319 | #define PAR_TO_HPFAR(par) \ | ||
320 | (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) | ||
226 | 321 | ||
227 | #define kvm_arm_exception_type \ | 322 | #define kvm_arm_exception_type \ |
228 | {0, "IRQ" }, \ | 323 | {0, "IRQ" }, \ |
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 102b5a5c47b6..aea01a09eb94 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #define ARM_EXCEPTION_IRQ 0 | 30 | #define ARM_EXCEPTION_IRQ 0 |
31 | #define ARM_EXCEPTION_EL1_SERROR 1 | 31 | #define ARM_EXCEPTION_EL1_SERROR 1 |
32 | #define ARM_EXCEPTION_TRAP 2 | 32 | #define ARM_EXCEPTION_TRAP 2 |
33 | #define ARM_EXCEPTION_IL 3 | ||
33 | /* The hyp-stub will return this for any kvm_call_hyp() call */ | 34 | /* The hyp-stub will return this for any kvm_call_hyp() call */ |
34 | #define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR | 35 | #define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR |
35 | 36 | ||
@@ -72,8 +73,6 @@ extern void __vgic_v3_init_lrs(void); | |||
72 | 73 | ||
73 | extern u32 __kvm_get_mdcr_el2(void); | 74 | extern u32 __kvm_get_mdcr_el2(void); |
74 | 75 | ||
75 | extern u32 __init_stage2_translation(void); | ||
76 | |||
77 | /* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ | 76 | /* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ |
78 | #define __hyp_this_cpu_ptr(sym) \ | 77 | #define __hyp_this_cpu_ptr(sym) \ |
79 | ({ \ | 78 | ({ \ |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d6d7336f871..f84052f306af 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -53,7 +53,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); | |||
53 | 53 | ||
54 | int __attribute_const__ kvm_target_cpu(void); | 54 | int __attribute_const__ kvm_target_cpu(void); |
55 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 55 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
56 | int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); | 56 | int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); |
57 | void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); | 57 | void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); |
58 | 58 | ||
59 | struct kvm_arch { | 59 | struct kvm_arch { |
@@ -61,11 +61,13 @@ struct kvm_arch { | |||
61 | u64 vmid_gen; | 61 | u64 vmid_gen; |
62 | u32 vmid; | 62 | u32 vmid; |
63 | 63 | ||
64 | /* 1-level 2nd stage table, protected by kvm->mmu_lock */ | 64 | /* stage2 entry level table */ |
65 | pgd_t *pgd; | 65 | pgd_t *pgd; |
66 | 66 | ||
67 | /* VTTBR value associated with above pgd and vmid */ | 67 | /* VTTBR value associated with above pgd and vmid */ |
68 | u64 vttbr; | 68 | u64 vttbr; |
69 | /* VTCR_EL2 value for this VM */ | ||
70 | u64 vtcr; | ||
69 | 71 | ||
70 | /* The last vcpu id that ran on each physical CPU */ | 72 | /* The last vcpu id that ran on each physical CPU */ |
71 | int __percpu *last_vcpu_ran; | 73 | int __percpu *last_vcpu_ran; |
@@ -440,13 +442,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, | |||
440 | int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, | 442 | int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, |
441 | struct kvm_device_attr *attr); | 443 | struct kvm_device_attr *attr); |
442 | 444 | ||
443 | static inline void __cpu_init_stage2(void) | 445 | static inline void __cpu_init_stage2(void) {} |
444 | { | ||
445 | u32 parange = kvm_call_hyp(__init_stage2_translation); | ||
446 | |||
447 | WARN_ONCE(parange < 40, | ||
448 | "PARange is %d bits, unsupported configuration!", parange); | ||
449 | } | ||
450 | 446 | ||
451 | /* Guest/host FPSIMD coordination helpers */ | 447 | /* Guest/host FPSIMD coordination helpers */ |
452 | int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); | 448 | int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); |
@@ -509,8 +505,12 @@ static inline int kvm_arm_have_ssbd(void) | |||
509 | void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); | 505 | void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); |
510 | void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); | 506 | void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); |
511 | 507 | ||
508 | void kvm_set_ipa_limit(void); | ||
509 | |||
512 | #define __KVM_HAVE_ARCH_VM_ALLOC | 510 | #define __KVM_HAVE_ARCH_VM_ALLOC |
513 | struct kvm *kvm_arch_alloc_vm(void); | 511 | struct kvm *kvm_arch_alloc_vm(void); |
514 | void kvm_arch_free_vm(struct kvm *kvm); | 512 | void kvm_arch_free_vm(struct kvm *kvm); |
515 | 513 | ||
514 | int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); | ||
515 | |||
516 | #endif /* __ARM64_KVM_HOST_H__ */ | 516 | #endif /* __ARM64_KVM_HOST_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 384c34397619..23aca66767f9 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h | |||
@@ -155,5 +155,15 @@ void deactivate_traps_vhe_put(void); | |||
155 | u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); | 155 | u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); |
156 | void __noreturn __hyp_do_panic(unsigned long, ...); | 156 | void __noreturn __hyp_do_panic(unsigned long, ...); |
157 | 157 | ||
158 | /* | ||
159 | * Must be called from hyp code running at EL2 with an updated VTTBR | ||
160 | * and interrupts disabled. | ||
161 | */ | ||
162 | static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) | ||
163 | { | ||
164 | write_sysreg(kvm->arch.vtcr, vtcr_el2); | ||
165 | write_sysreg(kvm->arch.vttbr, vttbr_el2); | ||
166 | } | ||
167 | |||
158 | #endif /* __ARM64_KVM_HYP_H__ */ | 168 | #endif /* __ARM64_KVM_HYP_H__ */ |
159 | 169 | ||
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d6fff7de5539..77b1af9e64db 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
@@ -141,8 +141,16 @@ static inline unsigned long __kern_hyp_va(unsigned long v) | |||
141 | * We currently only support a 40bit IPA. | 141 | * We currently only support a 40bit IPA. |
142 | */ | 142 | */ |
143 | #define KVM_PHYS_SHIFT (40) | 143 | #define KVM_PHYS_SHIFT (40) |
144 | #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) | 144 | |
145 | #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) | 145 | #define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr) |
146 | #define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) | ||
147 | #define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) | ||
148 | |||
149 | static inline bool kvm_page_empty(void *ptr) | ||
150 | { | ||
151 | struct page *ptr_page = virt_to_page(ptr); | ||
152 | return page_count(ptr_page) == 1; | ||
153 | } | ||
146 | 154 | ||
147 | #include <asm/stage2_pgtable.h> | 155 | #include <asm/stage2_pgtable.h> |
148 | 156 | ||
@@ -238,12 +246,6 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp) | |||
238 | return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN); | 246 | return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN); |
239 | } | 247 | } |
240 | 248 | ||
241 | static inline bool kvm_page_empty(void *ptr) | ||
242 | { | ||
243 | struct page *ptr_page = virt_to_page(ptr); | ||
244 | return page_count(ptr_page) == 1; | ||
245 | } | ||
246 | |||
247 | #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) | 249 | #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) |
248 | 250 | ||
249 | #ifdef __PAGETABLE_PMD_FOLDED | 251 | #ifdef __PAGETABLE_PMD_FOLDED |
@@ -517,5 +519,29 @@ static inline int hyp_map_aux_data(void) | |||
517 | 519 | ||
518 | #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) | 520 | #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) |
519 | 521 | ||
522 | /* | ||
523 | * Get the magic number 'x' for VTTBR:BADDR of this KVM instance. | ||
524 | * With v8.2 LVA extensions, 'x' should be a minimum of 6 with | ||
525 | * 52bit IPS. | ||
526 | */ | ||
527 | static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels) | ||
528 | { | ||
529 | int x = ARM64_VTTBR_X(ipa_shift, levels); | ||
530 | |||
531 | return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x; | ||
532 | } | ||
533 | |||
534 | static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels) | ||
535 | { | ||
536 | unsigned int x = arm64_vttbr_x(ipa_shift, levels); | ||
537 | |||
538 | return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x); | ||
539 | } | ||
540 | |||
541 | static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) | ||
542 | { | ||
543 | return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); | ||
544 | } | ||
545 | |||
520 | #endif /* __ASSEMBLY__ */ | 546 | #endif /* __ASSEMBLY__ */ |
521 | #endif /* __ARM64_KVM_MMU_H__ */ | 547 | #endif /* __ARM64_KVM_MMU_H__ */ |
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 177b851ca6d9..ff35ac1258eb 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h | |||
@@ -25,6 +25,9 @@ | |||
25 | #define CurrentEL_EL1 (1 << 2) | 25 | #define CurrentEL_EL1 (1 << 2) |
26 | #define CurrentEL_EL2 (2 << 2) | 26 | #define CurrentEL_EL2 (2 << 2) |
27 | 27 | ||
28 | /* Additional SPSR bits not exposed in the UABI */ | ||
29 | #define PSR_IL_BIT (1 << 20) | ||
30 | |||
28 | /* AArch32-specific ptrace requests */ | 31 | /* AArch32-specific ptrace requests */ |
29 | #define COMPAT_PTRACE_GETREGS 12 | 32 | #define COMPAT_PTRACE_GETREGS 12 |
30 | #define COMPAT_PTRACE_SETREGS 13 | 33 | #define COMPAT_PTRACE_SETREGS 13 |
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h deleted file mode 100644 index 2656a0fd05a6..000000000000 --- a/arch/arm64/include/asm/stage2_pgtable-nopmd.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 - ARM Ltd | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __ARM64_S2_PGTABLE_NOPMD_H_ | ||
18 | #define __ARM64_S2_PGTABLE_NOPMD_H_ | ||
19 | |||
20 | #include <asm/stage2_pgtable-nopud.h> | ||
21 | |||
22 | #define __S2_PGTABLE_PMD_FOLDED | ||
23 | |||
24 | #define S2_PMD_SHIFT S2_PUD_SHIFT | ||
25 | #define S2_PTRS_PER_PMD 1 | ||
26 | #define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) | ||
27 | #define S2_PMD_MASK (~(S2_PMD_SIZE-1)) | ||
28 | |||
29 | #define stage2_pud_none(pud) (0) | ||
30 | #define stage2_pud_present(pud) (1) | ||
31 | #define stage2_pud_clear(pud) do { } while (0) | ||
32 | #define stage2_pud_populate(pud, pmd) do { } while (0) | ||
33 | #define stage2_pmd_offset(pud, address) ((pmd_t *)(pud)) | ||
34 | |||
35 | #define stage2_pmd_free(pmd) do { } while (0) | ||
36 | |||
37 | #define stage2_pmd_addr_end(addr, end) (end) | ||
38 | |||
39 | #define stage2_pud_huge(pud) (0) | ||
40 | #define stage2_pmd_table_empty(pmdp) (0) | ||
41 | |||
42 | #endif | ||
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h deleted file mode 100644 index 5ee87b54ebf3..000000000000 --- a/arch/arm64/include/asm/stage2_pgtable-nopud.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 - ARM Ltd | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __ARM64_S2_PGTABLE_NOPUD_H_ | ||
18 | #define __ARM64_S2_PGTABLE_NOPUD_H_ | ||
19 | |||
20 | #define __S2_PGTABLE_PUD_FOLDED | ||
21 | |||
22 | #define S2_PUD_SHIFT S2_PGDIR_SHIFT | ||
23 | #define S2_PTRS_PER_PUD 1 | ||
24 | #define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) | ||
25 | #define S2_PUD_MASK (~(S2_PUD_SIZE-1)) | ||
26 | |||
27 | #define stage2_pgd_none(pgd) (0) | ||
28 | #define stage2_pgd_present(pgd) (1) | ||
29 | #define stage2_pgd_clear(pgd) do { } while (0) | ||
30 | #define stage2_pgd_populate(pgd, pud) do { } while (0) | ||
31 | |||
32 | #define stage2_pud_offset(pgd, address) ((pud_t *)(pgd)) | ||
33 | |||
34 | #define stage2_pud_free(x) do { } while (0) | ||
35 | |||
36 | #define stage2_pud_addr_end(addr, end) (end) | ||
37 | #define stage2_pud_table_empty(pmdp) (0) | ||
38 | |||
39 | #endif | ||
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 8b68099348e5..d352f6df8d2c 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h | |||
@@ -19,9 +19,17 @@ | |||
19 | #ifndef __ARM64_S2_PGTABLE_H_ | 19 | #ifndef __ARM64_S2_PGTABLE_H_ |
20 | #define __ARM64_S2_PGTABLE_H_ | 20 | #define __ARM64_S2_PGTABLE_H_ |
21 | 21 | ||
22 | #include <linux/hugetlb.h> | ||
22 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
23 | 24 | ||
24 | /* | 25 | /* |
26 | * PGDIR_SHIFT determines the size a top-level page table entry can map | ||
27 | * and depends on the number of levels in the page table. Compute the | ||
28 | * PGDIR_SHIFT for a given number of levels. | ||
29 | */ | ||
30 | #define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls)) | ||
31 | |||
32 | /* | ||
25 | * The hardware supports concatenation of up to 16 tables at stage2 entry level | 33 | * The hardware supports concatenation of up to 16 tables at stage2 entry level |
26 | * and we use the feature whenever possible. | 34 | * and we use the feature whenever possible. |
27 | * | 35 | * |
@@ -29,112 +37,208 @@ | |||
29 | * On arm64, the smallest PAGE_SIZE supported is 4k, which means | 37 | * On arm64, the smallest PAGE_SIZE supported is 4k, which means |
30 | * (PAGE_SHIFT - 3) > 4 holds for all page sizes. | 38 | * (PAGE_SHIFT - 3) > 4 holds for all page sizes. |
31 | * This implies, the total number of page table levels at stage2 expected | 39 | * This implies, the total number of page table levels at stage2 expected |
32 | * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4) | 40 | * by the hardware is actually the number of levels required for (IPA_SHIFT - 4) |
33 | * in normal translations(e.g, stage1), since we cannot have another level in | 41 | * in normal translations(e.g, stage1), since we cannot have another level in |
34 | * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4). | 42 | * the range (IPA_SHIFT, IPA_SHIFT - 4). |
35 | */ | 43 | */ |
36 | #define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4) | 44 | #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) |
45 | #define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) | ||
37 | 46 | ||
38 | /* | 47 | /* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */ |
39 | * With all the supported VA_BITs and 40bit guest IPA, the following condition | 48 | #define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm)) |
40 | * is always true: | 49 | #define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm)) |
41 | * | 50 | #define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1) |
42 | * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS | ||
43 | * | ||
44 | * We base our stage-2 page table walker helpers on this assumption and | ||
45 | * fall back to using the host version of the helper wherever possible. | ||
46 | * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back | ||
47 | * to using the host version, since it is guaranteed it is not folded at host. | ||
48 | * | ||
49 | * If the condition breaks in the future, we can rearrange the host level | ||
50 | * definitions and reuse them for stage2. Till then... | ||
51 | */ | ||
52 | #if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS | ||
53 | #error "Unsupported combination of guest IPA and host VA_BITS." | ||
54 | #endif | ||
55 | |||
56 | /* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */ | ||
57 | #define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS) | ||
58 | #define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT) | ||
59 | #define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1)) | ||
60 | 51 | ||
61 | /* | 52 | /* |
62 | * The number of PTRS across all concatenated stage2 tables given by the | 53 | * The number of PTRS across all concatenated stage2 tables given by the |
63 | * number of bits resolved at the initial level. | 54 | * number of bits resolved at the initial level. |
55 | * If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA), | ||
56 | * in which case, stage2_pgd_ptrs will have one entry. | ||
64 | */ | 57 | */ |
65 | #define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT)) | 58 | #define pgd_ptrs_shift(ipa, pgdir_shift) \ |
59 | ((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0) | ||
60 | #define __s2_pgd_ptrs(ipa, lvls) \ | ||
61 | (1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls)))) | ||
62 | #define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t)) | ||
63 | |||
64 | #define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)) | ||
65 | #define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)) | ||
66 | 66 | ||
67 | /* | 67 | /* |
68 | * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation | 68 | * kvm_mmmu_cache_min_pages() is the number of pages required to install |
69 | * levels in addition to the PGD. | 69 | * a stage-2 translation. We pre-allocate the entry level page table at |
70 | * the VM creation. | ||
70 | */ | 71 | */ |
71 | #define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1) | 72 | #define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1) |
72 | 73 | ||
73 | 74 | /* Stage2 PUD definitions when the level is present */ | |
74 | #if STAGE2_PGTABLE_LEVELS > 3 | 75 | static inline bool kvm_stage2_has_pud(struct kvm *kvm) |
76 | { | ||
77 | return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3); | ||
78 | } | ||
75 | 79 | ||
76 | #define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) | 80 | #define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) |
77 | #define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) | 81 | #define S2_PUD_SIZE (1UL << S2_PUD_SHIFT) |
78 | #define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) | 82 | #define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) |
79 | 83 | ||
80 | #define stage2_pgd_none(pgd) pgd_none(pgd) | 84 | static inline bool stage2_pgd_none(struct kvm *kvm, pgd_t pgd) |
81 | #define stage2_pgd_clear(pgd) pgd_clear(pgd) | 85 | { |
82 | #define stage2_pgd_present(pgd) pgd_present(pgd) | 86 | if (kvm_stage2_has_pud(kvm)) |
83 | #define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) | 87 | return pgd_none(pgd); |
84 | #define stage2_pud_offset(pgd, address) pud_offset(pgd, address) | 88 | else |
85 | #define stage2_pud_free(pud) pud_free(NULL, pud) | 89 | return 0; |
90 | } | ||
86 | 91 | ||
87 | #define stage2_pud_table_empty(pudp) kvm_page_empty(pudp) | 92 | static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp) |
93 | { | ||
94 | if (kvm_stage2_has_pud(kvm)) | ||
95 | pgd_clear(pgdp); | ||
96 | } | ||
88 | 97 | ||
89 | static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end) | 98 | static inline bool stage2_pgd_present(struct kvm *kvm, pgd_t pgd) |
90 | { | 99 | { |
91 | phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; | 100 | if (kvm_stage2_has_pud(kvm)) |
101 | return pgd_present(pgd); | ||
102 | else | ||
103 | return 1; | ||
104 | } | ||
92 | 105 | ||
93 | return (boundary - 1 < end - 1) ? boundary : end; | 106 | static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgd, pud_t *pud) |
107 | { | ||
108 | if (kvm_stage2_has_pud(kvm)) | ||
109 | pgd_populate(NULL, pgd, pud); | ||
110 | } | ||
111 | |||
112 | static inline pud_t *stage2_pud_offset(struct kvm *kvm, | ||
113 | pgd_t *pgd, unsigned long address) | ||
114 | { | ||
115 | if (kvm_stage2_has_pud(kvm)) | ||
116 | return pud_offset(pgd, address); | ||
117 | else | ||
118 | return (pud_t *)pgd; | ||
94 | } | 119 | } |
95 | 120 | ||
96 | #endif /* STAGE2_PGTABLE_LEVELS > 3 */ | 121 | static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud) |
122 | { | ||
123 | if (kvm_stage2_has_pud(kvm)) | ||
124 | pud_free(NULL, pud); | ||
125 | } | ||
97 | 126 | ||
127 | static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp) | ||
128 | { | ||
129 | if (kvm_stage2_has_pud(kvm)) | ||
130 | return kvm_page_empty(pudp); | ||
131 | else | ||
132 | return false; | ||
133 | } | ||
98 | 134 | ||
99 | #if STAGE2_PGTABLE_LEVELS > 2 | 135 | static inline phys_addr_t |
136 | stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | ||
137 | { | ||
138 | if (kvm_stage2_has_pud(kvm)) { | ||
139 | phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; | ||
140 | |||
141 | return (boundary - 1 < end - 1) ? boundary : end; | ||
142 | } else { | ||
143 | return end; | ||
144 | } | ||
145 | } | ||
146 | |||
147 | /* Stage2 PMD definitions when the level is present */ | ||
148 | static inline bool kvm_stage2_has_pmd(struct kvm *kvm) | ||
149 | { | ||
150 | return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2); | ||
151 | } | ||
100 | 152 | ||
101 | #define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) | 153 | #define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) |
102 | #define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT) | 154 | #define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) |
103 | #define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) | 155 | #define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) |
104 | 156 | ||
105 | #define stage2_pud_none(pud) pud_none(pud) | 157 | static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud) |
106 | #define stage2_pud_clear(pud) pud_clear(pud) | 158 | { |
107 | #define stage2_pud_present(pud) pud_present(pud) | 159 | if (kvm_stage2_has_pmd(kvm)) |
108 | #define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) | 160 | return pud_none(pud); |
109 | #define stage2_pmd_offset(pud, address) pmd_offset(pud, address) | 161 | else |
110 | #define stage2_pmd_free(pmd) pmd_free(NULL, pmd) | 162 | return 0; |
163 | } | ||
164 | |||
165 | static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud) | ||
166 | { | ||
167 | if (kvm_stage2_has_pmd(kvm)) | ||
168 | pud_clear(pud); | ||
169 | } | ||
111 | 170 | ||
112 | #define stage2_pud_huge(pud) pud_huge(pud) | 171 | static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud) |
113 | #define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) | 172 | { |
173 | if (kvm_stage2_has_pmd(kvm)) | ||
174 | return pud_present(pud); | ||
175 | else | ||
176 | return 1; | ||
177 | } | ||
114 | 178 | ||
115 | static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) | 179 | static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd) |
116 | { | 180 | { |
117 | phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; | 181 | if (kvm_stage2_has_pmd(kvm)) |
182 | pud_populate(NULL, pud, pmd); | ||
183 | } | ||
118 | 184 | ||
119 | return (boundary - 1 < end - 1) ? boundary : end; | 185 | static inline pmd_t *stage2_pmd_offset(struct kvm *kvm, |
186 | pud_t *pud, unsigned long address) | ||
187 | { | ||
188 | if (kvm_stage2_has_pmd(kvm)) | ||
189 | return pmd_offset(pud, address); | ||
190 | else | ||
191 | return (pmd_t *)pud; | ||
120 | } | 192 | } |
121 | 193 | ||
122 | #endif /* STAGE2_PGTABLE_LEVELS > 2 */ | 194 | static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd) |
195 | { | ||
196 | if (kvm_stage2_has_pmd(kvm)) | ||
197 | pmd_free(NULL, pmd); | ||
198 | } | ||
199 | |||
200 | static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud) | ||
201 | { | ||
202 | if (kvm_stage2_has_pmd(kvm)) | ||
203 | return pud_huge(pud); | ||
204 | else | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp) | ||
209 | { | ||
210 | if (kvm_stage2_has_pmd(kvm)) | ||
211 | return kvm_page_empty(pmdp); | ||
212 | else | ||
213 | return 0; | ||
214 | } | ||
123 | 215 | ||
124 | #define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) | 216 | static inline phys_addr_t |
217 | stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | ||
218 | { | ||
219 | if (kvm_stage2_has_pmd(kvm)) { | ||
220 | phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; | ||
125 | 221 | ||
126 | #if STAGE2_PGTABLE_LEVELS == 2 | 222 | return (boundary - 1 < end - 1) ? boundary : end; |
127 | #include <asm/stage2_pgtable-nopmd.h> | 223 | } else { |
128 | #elif STAGE2_PGTABLE_LEVELS == 3 | 224 | return end; |
129 | #include <asm/stage2_pgtable-nopud.h> | 225 | } |
130 | #endif | 226 | } |
131 | 227 | ||
228 | static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep) | ||
229 | { | ||
230 | return kvm_page_empty(ptep); | ||
231 | } | ||
132 | 232 | ||
133 | #define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) | 233 | static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr) |
234 | { | ||
235 | return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1)); | ||
236 | } | ||
134 | 237 | ||
135 | static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) | 238 | static inline phys_addr_t |
239 | stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | ||
136 | { | 240 | { |
137 | phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK; | 241 | phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm); |
138 | 242 | ||
139 | return (boundary - 1 < end - 1) ? boundary : end; | 243 | return (boundary - 1 < end - 1) ? boundary : end; |
140 | } | 244 | } |
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..a74f84d09412 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c | |||
@@ -338,15 +338,15 @@ int __attribute_const__ kvm_target_cpu(void) | |||
338 | return KVM_ARM_TARGET_CORTEX_A53; | 338 | return KVM_ARM_TARGET_CORTEX_A53; |
339 | case ARM_CPU_PART_CORTEX_A57: | 339 | case ARM_CPU_PART_CORTEX_A57: |
340 | return KVM_ARM_TARGET_CORTEX_A57; | 340 | return KVM_ARM_TARGET_CORTEX_A57; |
341 | }; | 341 | } |
342 | break; | 342 | break; |
343 | case ARM_CPU_IMP_APM: | 343 | case ARM_CPU_IMP_APM: |
344 | switch (part_number) { | 344 | switch (part_number) { |
345 | case APM_CPU_PART_POTENZA: | 345 | case APM_CPU_PART_POTENZA: |
346 | return KVM_ARM_TARGET_XGENE_POTENZA; | 346 | return KVM_ARM_TARGET_XGENE_POTENZA; |
347 | }; | 347 | } |
348 | break; | 348 | break; |
349 | }; | 349 | } |
350 | 350 | ||
351 | /* Return a default generic target */ | 351 | /* Return a default generic target */ |
352 | return KVM_ARM_TARGET_GENERIC_V8; | 352 | return KVM_ARM_TARGET_GENERIC_V8; |
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index e5e741bfffe1..35a81bebd02b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c | |||
@@ -284,6 +284,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
284 | */ | 284 | */ |
285 | run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 285 | run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
286 | return 0; | 286 | return 0; |
287 | case ARM_EXCEPTION_IL: | ||
288 | /* | ||
289 | * We attempted an illegal exception return. Guest state must | ||
290 | * have been corrupted somehow. Give up. | ||
291 | */ | ||
292 | run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
293 | return -EINVAL; | ||
287 | default: | 294 | default: |
288 | kvm_pr_unimpl("Unsupported exception type: %d", | 295 | kvm_pr_unimpl("Unsupported exception type: %d", |
289 | exception_index); | 296 | exception_index); |
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 2fabc2dc1966..82d1904328ad 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile | |||
@@ -19,7 +19,6 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o | |||
19 | obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o | 19 | obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o |
20 | obj-$(CONFIG_KVM_ARM_HOST) += tlb.o | 20 | obj-$(CONFIG_KVM_ARM_HOST) += tlb.o |
21 | obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o | 21 | obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o |
22 | obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o | ||
23 | 22 | ||
24 | # KVM code is run at a different exception code with a different map, so | 23 | # KVM code is run at a different exception code with a different map, so |
25 | # compiler instrumentation that inserts callbacks or checks into the code may | 24 | # compiler instrumentation that inserts callbacks or checks into the code may |
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 24b4fbafe3e4..b1f14f736962 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S | |||
@@ -162,6 +162,20 @@ el1_error: | |||
162 | mov x0, #ARM_EXCEPTION_EL1_SERROR | 162 | mov x0, #ARM_EXCEPTION_EL1_SERROR |
163 | b __guest_exit | 163 | b __guest_exit |
164 | 164 | ||
165 | el2_sync: | ||
166 | /* Check for illegal exception return, otherwise panic */ | ||
167 | mrs x0, spsr_el2 | ||
168 | |||
169 | /* if this was something else, then panic! */ | ||
170 | tst x0, #PSR_IL_BIT | ||
171 | b.eq __hyp_panic | ||
172 | |||
173 | /* Let's attempt a recovery from the illegal exception return */ | ||
174 | get_vcpu_ptr x1, x0 | ||
175 | mov x0, #ARM_EXCEPTION_IL | ||
176 | b __guest_exit | ||
177 | |||
178 | |||
165 | el2_error: | 179 | el2_error: |
166 | ldp x0, x1, [sp], #16 | 180 | ldp x0, x1, [sp], #16 |
167 | 181 | ||
@@ -240,7 +254,7 @@ ENTRY(__kvm_hyp_vector) | |||
240 | invalid_vect el2t_fiq_invalid // FIQ EL2t | 254 | invalid_vect el2t_fiq_invalid // FIQ EL2t |
241 | invalid_vect el2t_error_invalid // Error EL2t | 255 | invalid_vect el2t_error_invalid // Error EL2t |
242 | 256 | ||
243 | invalid_vect el2h_sync_invalid // Synchronous EL2h | 257 | valid_vect el2_sync // Synchronous EL2h |
244 | invalid_vect el2h_irq_invalid // IRQ EL2h | 258 | invalid_vect el2h_irq_invalid // IRQ EL2h |
245 | invalid_vect el2h_fiq_invalid // FIQ EL2h | 259 | invalid_vect el2h_fiq_invalid // FIQ EL2h |
246 | valid_vect el2_error // Error EL2h | 260 | valid_vect el2_error // Error EL2h |
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c deleted file mode 100644 index 603e1ee83e89..000000000000 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ /dev/null | |||
@@ -1,90 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <asm/kvm_arm.h> | ||
20 | #include <asm/kvm_asm.h> | ||
21 | #include <asm/kvm_hyp.h> | ||
22 | |||
23 | u32 __hyp_text __init_stage2_translation(void) | ||
24 | { | ||
25 | u64 val = VTCR_EL2_FLAGS; | ||
26 | u64 parange; | ||
27 | u64 tmp; | ||
28 | |||
29 | /* | ||
30 | * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS | ||
31 | * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while | ||
32 | * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... | ||
33 | */ | ||
34 | parange = read_sysreg(id_aa64mmfr0_el1) & 7; | ||
35 | if (parange > ID_AA64MMFR0_PARANGE_MAX) | ||
36 | parange = ID_AA64MMFR0_PARANGE_MAX; | ||
37 | val |= parange << 16; | ||
38 | |||
39 | /* Compute the actual PARange... */ | ||
40 | switch (parange) { | ||
41 | case 0: | ||
42 | parange = 32; | ||
43 | break; | ||
44 | case 1: | ||
45 | parange = 36; | ||
46 | break; | ||
47 | case 2: | ||
48 | parange = 40; | ||
49 | break; | ||
50 | case 3: | ||
51 | parange = 42; | ||
52 | break; | ||
53 | case 4: | ||
54 | parange = 44; | ||
55 | break; | ||
56 | case 5: | ||
57 | default: | ||
58 | parange = 48; | ||
59 | break; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * ... and clamp it to 40 bits, unless we have some braindead | ||
64 | * HW that implements less than that. In all cases, we'll | ||
65 | * return that value for the rest of the kernel to decide what | ||
66 | * to do. | ||
67 | */ | ||
68 | val |= 64 - (parange > 40 ? 40 : parange); | ||
69 | |||
70 | /* | ||
71 | * Check the availability of Hardware Access Flag / Dirty Bit | ||
72 | * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2. | ||
73 | */ | ||
74 | tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf; | ||
75 | if (tmp) | ||
76 | val |= VTCR_EL2_HA; | ||
77 | |||
78 | /* | ||
79 | * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS | ||
80 | * bit in VTCR_EL2. | ||
81 | */ | ||
82 | tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf; | ||
83 | val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ? | ||
84 | VTCR_EL2_VS_16BIT : | ||
85 | VTCR_EL2_VS_8BIT; | ||
86 | |||
87 | write_sysreg(val, vtcr_el2); | ||
88 | |||
89 | return parange; | ||
90 | } | ||
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index ca46153d7915..7cc175c88a37 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c | |||
@@ -198,7 +198,7 @@ void deactivate_traps_vhe_put(void) | |||
198 | 198 | ||
199 | static void __hyp_text __activate_vm(struct kvm *kvm) | 199 | static void __hyp_text __activate_vm(struct kvm *kvm) |
200 | { | 200 | { |
201 | write_sysreg(kvm->arch.vttbr, vttbr_el2); | 201 | __load_guest_stage2(kvm); |
202 | } | 202 | } |
203 | 203 | ||
204 | static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) | 204 | static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) |
@@ -263,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) | |||
263 | return false; /* Translation failed, back to guest */ | 263 | return false; /* Translation failed, back to guest */ |
264 | 264 | ||
265 | /* Convert PAR to HPFAR format */ | 265 | /* Convert PAR to HPFAR format */ |
266 | *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4; | 266 | *hpfar = PAR_TO_HPFAR(tmp); |
267 | return true; | 267 | return true; |
268 | } | 268 | } |
269 | 269 | ||
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 9ce223944983..8dc285318204 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c | |||
@@ -152,8 +152,25 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) | |||
152 | static void __hyp_text | 152 | static void __hyp_text |
153 | __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) | 153 | __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) |
154 | { | 154 | { |
155 | u64 pstate = ctxt->gp_regs.regs.pstate; | ||
156 | u64 mode = pstate & PSR_AA32_MODE_MASK; | ||
157 | |||
158 | /* | ||
159 | * Safety check to ensure we're setting the CPU up to enter the guest | ||
160 | * in a less privileged mode. | ||
161 | * | ||
162 | * If we are attempting a return to EL2 or higher in AArch64 state, | ||
163 | * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that | ||
164 | * we'll take an illegal exception state exception immediately after | ||
165 | * the ERET to the guest. Attempts to return to AArch32 Hyp will | ||
166 | * result in an illegal exception return because EL2's execution state | ||
167 | * is determined by SCR_EL3.RW. | ||
168 | */ | ||
169 | if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) | ||
170 | pstate = PSR_MODE_EL2h | PSR_IL_BIT; | ||
171 | |||
155 | write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); | 172 | write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); |
156 | write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); | 173 | write_sysreg_el2(pstate, spsr); |
157 | 174 | ||
158 | if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) | 175 | if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) |
159 | write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); | 176 | write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); |
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 131c7772703c..4dbd9c69a96d 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c | |||
@@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) | |||
30 | * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so | 30 | * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so |
31 | * let's flip TGE before executing the TLB operation. | 31 | * let's flip TGE before executing the TLB operation. |
32 | */ | 32 | */ |
33 | write_sysreg(kvm->arch.vttbr, vttbr_el2); | 33 | __load_guest_stage2(kvm); |
34 | val = read_sysreg(hcr_el2); | 34 | val = read_sysreg(hcr_el2); |
35 | val &= ~HCR_TGE; | 35 | val &= ~HCR_TGE; |
36 | write_sysreg(val, hcr_el2); | 36 | write_sysreg(val, hcr_el2); |
@@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) | |||
39 | 39 | ||
40 | static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) | 40 | static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) |
41 | { | 41 | { |
42 | write_sysreg(kvm->arch.vttbr, vttbr_el2); | 42 | __load_guest_stage2(kvm); |
43 | isb(); | 43 | isb(); |
44 | } | 44 | } |
45 | 45 | ||
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e37c78bbe1ca..b72a3dd56204 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #include <kvm/arm_arch_timer.h> | 27 | #include <kvm/arm_arch_timer.h> |
28 | 28 | ||
29 | #include <asm/cpufeature.h> | ||
29 | #include <asm/cputype.h> | 30 | #include <asm/cputype.h> |
30 | #include <asm/ptrace.h> | 31 | #include <asm/ptrace.h> |
31 | #include <asm/kvm_arm.h> | 32 | #include <asm/kvm_arm.h> |
@@ -33,6 +34,9 @@ | |||
33 | #include <asm/kvm_coproc.h> | 34 | #include <asm/kvm_coproc.h> |
34 | #include <asm/kvm_mmu.h> | 35 | #include <asm/kvm_mmu.h> |
35 | 36 | ||
37 | /* Maximum phys_shift supported for any VM on this host */ | ||
38 | static u32 kvm_ipa_limit; | ||
39 | |||
36 | /* | 40 | /* |
37 | * ARMv8 Reset Values | 41 | * ARMv8 Reset Values |
38 | */ | 42 | */ |
@@ -55,12 +59,12 @@ static bool cpu_has_32bit_el1(void) | |||
55 | } | 59 | } |
56 | 60 | ||
57 | /** | 61 | /** |
58 | * kvm_arch_dev_ioctl_check_extension | 62 | * kvm_arch_vm_ioctl_check_extension |
59 | * | 63 | * |
60 | * We currently assume that the number of HW registers is uniform | 64 | * We currently assume that the number of HW registers is uniform |
61 | * across all CPUs (see cpuinfo_sanity_check). | 65 | * across all CPUs (see cpuinfo_sanity_check). |
62 | */ | 66 | */ |
63 | int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) | 67 | int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) |
64 | { | 68 | { |
65 | int r; | 69 | int r; |
66 | 70 | ||
@@ -82,9 +86,11 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) | |||
82 | break; | 86 | break; |
83 | case KVM_CAP_SET_GUEST_DEBUG: | 87 | case KVM_CAP_SET_GUEST_DEBUG: |
84 | case KVM_CAP_VCPU_ATTRIBUTES: | 88 | case KVM_CAP_VCPU_ATTRIBUTES: |
85 | case KVM_CAP_VCPU_EVENTS: | ||
86 | r = 1; | 89 | r = 1; |
87 | break; | 90 | break; |
91 | case KVM_CAP_ARM_VM_IPA_SIZE: | ||
92 | r = kvm_ipa_limit; | ||
93 | break; | ||
88 | default: | 94 | default: |
89 | r = 0; | 95 | r = 0; |
90 | } | 96 | } |
@@ -133,3 +139,99 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
133 | /* Reset timer */ | 139 | /* Reset timer */ |
134 | return kvm_timer_vcpu_reset(vcpu); | 140 | return kvm_timer_vcpu_reset(vcpu); |
135 | } | 141 | } |
142 | |||
143 | void kvm_set_ipa_limit(void) | ||
144 | { | ||
145 | unsigned int ipa_max, pa_max, va_max, parange; | ||
146 | |||
147 | parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7; | ||
148 | pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); | ||
149 | |||
150 | /* Clamp the IPA limit to the PA size supported by the kernel */ | ||
151 | ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max; | ||
152 | /* | ||
153 | * Since our stage2 table is dependent on the stage1 page table code, | ||
154 | * we must always honor the following condition: | ||
155 | * | ||
156 | * Number of levels in Stage1 >= Number of levels in Stage2. | ||
157 | * | ||
158 | * So clamp the ipa limit further down to limit the number of levels. | ||
159 | * Since we can concatenate upto 16 tables at entry level, we could | ||
160 | * go upto 4bits above the maximum VA addressible with the current | ||
161 | * number of levels. | ||
162 | */ | ||
163 | va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; | ||
164 | va_max += 4; | ||
165 | |||
166 | if (va_max < ipa_max) | ||
167 | ipa_max = va_max; | ||
168 | |||
169 | /* | ||
170 | * If the final limit is lower than the real physical address | ||
171 | * limit of the CPUs, report the reason. | ||
172 | */ | ||
173 | if (ipa_max < pa_max) | ||
174 | pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n", | ||
175 | (va_max < pa_max) ? "Virtual" : "Physical"); | ||
176 | |||
177 | WARN(ipa_max < KVM_PHYS_SHIFT, | ||
178 | "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); | ||
179 | kvm_ipa_limit = ipa_max; | ||
180 | kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * Configure the VTCR_EL2 for this VM. The VTCR value is common | ||
185 | * across all the physical CPUs on the system. We use system wide | ||
186 | * sanitised values to fill in different fields, except for Hardware | ||
187 | * Management of Access Flags. HA Flag is set unconditionally on | ||
188 | * all CPUs, as it is safe to run with or without the feature and | ||
189 | * the bit is RES0 on CPUs that don't support it. | ||
190 | */ | ||
191 | int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) | ||
192 | { | ||
193 | u64 vtcr = VTCR_EL2_FLAGS; | ||
194 | u32 parange, phys_shift; | ||
195 | u8 lvls; | ||
196 | |||
197 | if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) | ||
198 | return -EINVAL; | ||
199 | |||
200 | phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); | ||
201 | if (phys_shift) { | ||
202 | if (phys_shift > kvm_ipa_limit || | ||
203 | phys_shift < 32) | ||
204 | return -EINVAL; | ||
205 | } else { | ||
206 | phys_shift = KVM_PHYS_SHIFT; | ||
207 | } | ||
208 | |||
209 | parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; | ||
210 | if (parange > ID_AA64MMFR0_PARANGE_MAX) | ||
211 | parange = ID_AA64MMFR0_PARANGE_MAX; | ||
212 | vtcr |= parange << VTCR_EL2_PS_SHIFT; | ||
213 | |||
214 | vtcr |= VTCR_EL2_T0SZ(phys_shift); | ||
215 | /* | ||
216 | * Use a minimum 2 level page table to prevent splitting | ||
217 | * host PMD huge pages at stage2. | ||
218 | */ | ||
219 | lvls = stage2_pgtable_levels(phys_shift); | ||
220 | if (lvls < 2) | ||
221 | lvls = 2; | ||
222 | vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); | ||
223 | |||
224 | /* | ||
225 | * Enable the Hardware Access Flag management, unconditionally | ||
226 | * on all CPUs. The features is RES0 on CPUs without the support | ||
227 | * and must be ignored by the CPUs. | ||
228 | */ | ||
229 | vtcr |= VTCR_EL2_HA; | ||
230 | |||
231 | /* Set the vmid bits */ | ||
232 | vtcr |= (kvm_get_vmid_bits() == 16) ? | ||
233 | VTCR_EL2_VS_16BIT : | ||
234 | VTCR_EL2_VS_8BIT; | ||
235 | kvm->arch.vtcr = vtcr; | ||
236 | return 0; | ||
237 | } | ||
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e203169931c7..6390bd8c141b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -14,6 +14,16 @@ | |||
14 | #ifndef _ASM_X86_FIXMAP_H | 14 | #ifndef _ASM_X86_FIXMAP_H |
15 | #define _ASM_X86_FIXMAP_H | 15 | #define _ASM_X86_FIXMAP_H |
16 | 16 | ||
17 | /* | ||
18 | * Exposed to assembly code for setting up initial page tables. Cannot be | ||
19 | * calculated in assembly code (fixmap entries are an enum), but is sanity | ||
20 | * checked in the actual fixmap C code to make sure that the fixmap is | ||
21 | * covered fully. | ||
22 | */ | ||
23 | #define FIXMAP_PMD_NUM 2 | ||
24 | /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ | ||
25 | #define FIXMAP_PMD_TOP 507 | ||
26 | |||
17 | #ifndef __ASSEMBLY__ | 27 | #ifndef __ASSEMBLY__ |
18 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
19 | #include <asm/acpi.h> | 29 | #include <asm/acpi.h> |
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c0643831706e..616f8e637bc3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h | |||
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); | |||
48 | 48 | ||
49 | /* Architecture __weak replacement functions */ | 49 | /* Architecture __weak replacement functions */ |
50 | void __init mem_encrypt_init(void); | 50 | void __init mem_encrypt_init(void); |
51 | void __init mem_encrypt_free_decrypted_mem(void); | ||
51 | 52 | ||
52 | bool sme_active(void); | 53 | bool sme_active(void); |
53 | bool sev_active(void); | 54 | bool sev_active(void); |
54 | 55 | ||
56 | #define __bss_decrypted __attribute__((__section__(".bss..decrypted"))) | ||
57 | |||
55 | #else /* !CONFIG_AMD_MEM_ENCRYPT */ | 58 | #else /* !CONFIG_AMD_MEM_ENCRYPT */ |
56 | 59 | ||
57 | #define sme_me_mask 0ULL | 60 | #define sme_me_mask 0ULL |
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; | |||
77 | static inline int __init | 80 | static inline int __init |
78 | early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } | 81 | early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } |
79 | 82 | ||
83 | #define __bss_decrypted | ||
84 | |||
80 | #endif /* CONFIG_AMD_MEM_ENCRYPT */ | 85 | #endif /* CONFIG_AMD_MEM_ENCRYPT */ |
81 | 86 | ||
82 | /* | 87 | /* |
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; | |||
88 | #define __sme_pa(x) (__pa(x) | sme_me_mask) | 93 | #define __sme_pa(x) (__pa(x) | sme_me_mask) |
89 | #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) | 94 | #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) |
90 | 95 | ||
96 | extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; | ||
97 | |||
91 | #endif /* __ASSEMBLY__ */ | 98 | #endif /* __ASSEMBLY__ */ |
92 | 99 | ||
93 | #endif /* __X86_MEM_ENCRYPT_H__ */ | 100 | #endif /* __X86_MEM_ENCRYPT_H__ */ |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ce2b59047cb8..9c85b54bf03c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
15 | #include <linux/bitops.h> | 15 | #include <linux/bitops.h> |
16 | #include <linux/threads.h> | 16 | #include <linux/threads.h> |
17 | #include <asm/fixmap.h> | ||
17 | 18 | ||
18 | extern p4d_t level4_kernel_pgt[512]; | 19 | extern p4d_t level4_kernel_pgt[512]; |
19 | extern p4d_t level4_ident_pgt[512]; | 20 | extern p4d_t level4_ident_pgt[512]; |
@@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512]; | |||
22 | extern pmd_t level2_kernel_pgt[512]; | 23 | extern pmd_t level2_kernel_pgt[512]; |
23 | extern pmd_t level2_fixmap_pgt[512]; | 24 | extern pmd_t level2_fixmap_pgt[512]; |
24 | extern pmd_t level2_ident_pgt[512]; | 25 | extern pmd_t level2_ident_pgt[512]; |
25 | extern pte_t level1_fixmap_pgt[512]; | 26 | extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; |
26 | extern pgd_t init_top_pgt[]; | 27 | extern pgd_t init_top_pgt[]; |
27 | 28 | ||
28 | #define swapper_pg_dir init_top_pgt | 29 | #define swapper_pg_dir init_top_pgt |
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 4e588f36228f..285eb3ec4200 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h | |||
@@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e) | |||
382 | e <= QOS_L3_MBM_LOCAL_EVENT_ID); | 382 | e <= QOS_L3_MBM_LOCAL_EVENT_ID); |
383 | } | 383 | } |
384 | 384 | ||
385 | struct rdt_parse_data { | ||
386 | struct rdtgroup *rdtgrp; | ||
387 | char *buf; | ||
388 | }; | ||
389 | |||
385 | /** | 390 | /** |
386 | * struct rdt_resource - attributes of an RDT resource | 391 | * struct rdt_resource - attributes of an RDT resource |
387 | * @rid: The index of the resource | 392 | * @rid: The index of the resource |
@@ -423,16 +428,19 @@ struct rdt_resource { | |||
423 | struct rdt_cache cache; | 428 | struct rdt_cache cache; |
424 | struct rdt_membw membw; | 429 | struct rdt_membw membw; |
425 | const char *format_str; | 430 | const char *format_str; |
426 | int (*parse_ctrlval) (void *data, struct rdt_resource *r, | 431 | int (*parse_ctrlval)(struct rdt_parse_data *data, |
427 | struct rdt_domain *d); | 432 | struct rdt_resource *r, |
433 | struct rdt_domain *d); | ||
428 | struct list_head evt_list; | 434 | struct list_head evt_list; |
429 | int num_rmid; | 435 | int num_rmid; |
430 | unsigned int mon_scale; | 436 | unsigned int mon_scale; |
431 | unsigned long fflags; | 437 | unsigned long fflags; |
432 | }; | 438 | }; |
433 | 439 | ||
434 | int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d); | 440 | int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, |
435 | int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d); | 441 | struct rdt_domain *d); |
442 | int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, | ||
443 | struct rdt_domain *d); | ||
436 | 444 | ||
437 | extern struct mutex rdtgroup_mutex; | 445 | extern struct mutex rdtgroup_mutex; |
438 | 446 | ||
@@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); | |||
536 | void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); | 544 | void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); |
537 | struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); | 545 | struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); |
538 | int update_domains(struct rdt_resource *r, int closid); | 546 | int update_domains(struct rdt_resource *r, int closid); |
547 | int closids_supported(void); | ||
539 | void closid_free(int closid); | 548 | void closid_free(int closid); |
540 | int alloc_rmid(void); | 549 | int alloc_rmid(void); |
541 | void free_rmid(u32 rmid); | 550 | void free_rmid(u32 rmid); |
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index af358ca05160..0f53049719cd 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | |||
@@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) | |||
64 | return true; | 64 | return true; |
65 | } | 65 | } |
66 | 66 | ||
67 | int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d) | 67 | int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, |
68 | struct rdt_domain *d) | ||
68 | { | 69 | { |
69 | unsigned long data; | 70 | unsigned long bw_val; |
70 | char *buf = _buf; | ||
71 | 71 | ||
72 | if (d->have_new_ctrl) { | 72 | if (d->have_new_ctrl) { |
73 | rdt_last_cmd_printf("duplicate domain %d\n", d->id); | 73 | rdt_last_cmd_printf("duplicate domain %d\n", d->id); |
74 | return -EINVAL; | 74 | return -EINVAL; |
75 | } | 75 | } |
76 | 76 | ||
77 | if (!bw_validate(buf, &data, r)) | 77 | if (!bw_validate(data->buf, &bw_val, r)) |
78 | return -EINVAL; | 78 | return -EINVAL; |
79 | d->new_ctrl = data; | 79 | d->new_ctrl = bw_val; |
80 | d->have_new_ctrl = true; | 80 | d->have_new_ctrl = true; |
81 | 81 | ||
82 | return 0; | 82 | return 0; |
@@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) | |||
123 | return true; | 123 | return true; |
124 | } | 124 | } |
125 | 125 | ||
126 | struct rdt_cbm_parse_data { | ||
127 | struct rdtgroup *rdtgrp; | ||
128 | char *buf; | ||
129 | }; | ||
130 | |||
131 | /* | 126 | /* |
132 | * Read one cache bit mask (hex). Check that it is valid for the current | 127 | * Read one cache bit mask (hex). Check that it is valid for the current |
133 | * resource type. | 128 | * resource type. |
134 | */ | 129 | */ |
135 | int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) | 130 | int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, |
131 | struct rdt_domain *d) | ||
136 | { | 132 | { |
137 | struct rdt_cbm_parse_data *data = _data; | ||
138 | struct rdtgroup *rdtgrp = data->rdtgrp; | 133 | struct rdtgroup *rdtgrp = data->rdtgrp; |
139 | u32 cbm_val; | 134 | u32 cbm_val; |
140 | 135 | ||
@@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) | |||
195 | static int parse_line(char *line, struct rdt_resource *r, | 190 | static int parse_line(char *line, struct rdt_resource *r, |
196 | struct rdtgroup *rdtgrp) | 191 | struct rdtgroup *rdtgrp) |
197 | { | 192 | { |
198 | struct rdt_cbm_parse_data data; | 193 | struct rdt_parse_data data; |
199 | char *dom = NULL, *id; | 194 | char *dom = NULL, *id; |
200 | struct rdt_domain *d; | 195 | struct rdt_domain *d; |
201 | unsigned long dom_id; | 196 | unsigned long dom_id; |
202 | 197 | ||
198 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && | ||
199 | r->rid == RDT_RESOURCE_MBA) { | ||
200 | rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); | ||
201 | return -EINVAL; | ||
202 | } | ||
203 | |||
203 | next: | 204 | next: |
204 | if (!line || line[0] == '\0') | 205 | if (!line || line[0] == '\0') |
205 | return 0; | 206 | return 0; |
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index b799c00bef09..1b8e86a5d5e1 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | |||
@@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...) | |||
97 | * limited as the number of resources grows. | 97 | * limited as the number of resources grows. |
98 | */ | 98 | */ |
99 | static int closid_free_map; | 99 | static int closid_free_map; |
100 | static int closid_free_map_len; | ||
101 | |||
102 | int closids_supported(void) | ||
103 | { | ||
104 | return closid_free_map_len; | ||
105 | } | ||
100 | 106 | ||
101 | static void closid_init(void) | 107 | static void closid_init(void) |
102 | { | 108 | { |
@@ -111,6 +117,7 @@ static void closid_init(void) | |||
111 | 117 | ||
112 | /* CLOSID 0 is always reserved for the default group */ | 118 | /* CLOSID 0 is always reserved for the default group */ |
113 | closid_free_map &= ~1; | 119 | closid_free_map &= ~1; |
120 | closid_free_map_len = rdt_min_closid; | ||
114 | } | 121 | } |
115 | 122 | ||
116 | static int closid_alloc(void) | 123 | static int closid_alloc(void) |
@@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, | |||
802 | sw_shareable = 0; | 809 | sw_shareable = 0; |
803 | exclusive = 0; | 810 | exclusive = 0; |
804 | seq_printf(seq, "%d=", dom->id); | 811 | seq_printf(seq, "%d=", dom->id); |
805 | for (i = 0; i < r->num_closid; i++, ctrl++) { | 812 | for (i = 0; i < closids_supported(); i++, ctrl++) { |
806 | if (!closid_allocated(i)) | 813 | if (!closid_allocated(i)) |
807 | continue; | 814 | continue; |
808 | mode = rdtgroup_mode_by_closid(i); | 815 | mode = rdtgroup_mode_by_closid(i); |
@@ -989,7 +996,7 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, | |||
989 | 996 | ||
990 | /* Check for overlap with other resource groups */ | 997 | /* Check for overlap with other resource groups */ |
991 | ctrl = d->ctrl_val; | 998 | ctrl = d->ctrl_val; |
992 | for (i = 0; i < r->num_closid; i++, ctrl++) { | 999 | for (i = 0; i < closids_supported(); i++, ctrl++) { |
993 | ctrl_b = (unsigned long *)ctrl; | 1000 | ctrl_b = (unsigned long *)ctrl; |
994 | mode = rdtgroup_mode_by_closid(i); | 1001 | mode = rdtgroup_mode_by_closid(i); |
995 | if (closid_allocated(i) && i != closid && | 1002 | if (closid_allocated(i) && i != closid && |
@@ -1024,16 +1031,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) | |||
1024 | { | 1031 | { |
1025 | int closid = rdtgrp->closid; | 1032 | int closid = rdtgrp->closid; |
1026 | struct rdt_resource *r; | 1033 | struct rdt_resource *r; |
1034 | bool has_cache = false; | ||
1027 | struct rdt_domain *d; | 1035 | struct rdt_domain *d; |
1028 | 1036 | ||
1029 | for_each_alloc_enabled_rdt_resource(r) { | 1037 | for_each_alloc_enabled_rdt_resource(r) { |
1038 | if (r->rid == RDT_RESOURCE_MBA) | ||
1039 | continue; | ||
1040 | has_cache = true; | ||
1030 | list_for_each_entry(d, &r->domains, list) { | 1041 | list_for_each_entry(d, &r->domains, list) { |
1031 | if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], | 1042 | if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], |
1032 | rdtgrp->closid, false)) | 1043 | rdtgrp->closid, false)) { |
1044 | rdt_last_cmd_puts("schemata overlaps\n"); | ||
1033 | return false; | 1045 | return false; |
1046 | } | ||
1034 | } | 1047 | } |
1035 | } | 1048 | } |
1036 | 1049 | ||
1050 | if (!has_cache) { | ||
1051 | rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n"); | ||
1052 | return false; | ||
1053 | } | ||
1054 | |||
1037 | return true; | 1055 | return true; |
1038 | } | 1056 | } |
1039 | 1057 | ||
@@ -1085,7 +1103,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, | |||
1085 | rdtgrp->mode = RDT_MODE_SHAREABLE; | 1103 | rdtgrp->mode = RDT_MODE_SHAREABLE; |
1086 | } else if (!strcmp(buf, "exclusive")) { | 1104 | } else if (!strcmp(buf, "exclusive")) { |
1087 | if (!rdtgroup_mode_test_exclusive(rdtgrp)) { | 1105 | if (!rdtgroup_mode_test_exclusive(rdtgrp)) { |
1088 | rdt_last_cmd_printf("schemata overlaps\n"); | ||
1089 | ret = -EINVAL; | 1106 | ret = -EINVAL; |
1090 | goto out; | 1107 | goto out; |
1091 | } | 1108 | } |
@@ -1155,8 +1172,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, | |||
1155 | struct rdt_resource *r; | 1172 | struct rdt_resource *r; |
1156 | struct rdt_domain *d; | 1173 | struct rdt_domain *d; |
1157 | unsigned int size; | 1174 | unsigned int size; |
1158 | bool sep = false; | 1175 | bool sep; |
1159 | u32 cbm; | 1176 | u32 ctrl; |
1160 | 1177 | ||
1161 | rdtgrp = rdtgroup_kn_lock_live(of->kn); | 1178 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
1162 | if (!rdtgrp) { | 1179 | if (!rdtgrp) { |
@@ -1174,6 +1191,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, | |||
1174 | } | 1191 | } |
1175 | 1192 | ||
1176 | for_each_alloc_enabled_rdt_resource(r) { | 1193 | for_each_alloc_enabled_rdt_resource(r) { |
1194 | sep = false; | ||
1177 | seq_printf(s, "%*s:", max_name_width, r->name); | 1195 | seq_printf(s, "%*s:", max_name_width, r->name); |
1178 | list_for_each_entry(d, &r->domains, list) { | 1196 | list_for_each_entry(d, &r->domains, list) { |
1179 | if (sep) | 1197 | if (sep) |
@@ -1181,8 +1199,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, | |||
1181 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { | 1199 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { |
1182 | size = 0; | 1200 | size = 0; |
1183 | } else { | 1201 | } else { |
1184 | cbm = d->ctrl_val[rdtgrp->closid]; | 1202 | ctrl = (!is_mba_sc(r) ? |
1185 | size = rdtgroup_cbm_to_size(r, d, cbm); | 1203 | d->ctrl_val[rdtgrp->closid] : |
1204 | d->mbps_val[rdtgrp->closid]); | ||
1205 | if (r->rid == RDT_RESOURCE_MBA) | ||
1206 | size = ctrl; | ||
1207 | else | ||
1208 | size = rdtgroup_cbm_to_size(r, d, ctrl); | ||
1186 | } | 1209 | } |
1187 | seq_printf(s, "%d=%u", d->id, size); | 1210 | seq_printf(s, "%d=%u", d->id, size); |
1188 | sep = true; | 1211 | sep = true; |
@@ -2336,12 +2359,18 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) | |||
2336 | u32 *ctrl; | 2359 | u32 *ctrl; |
2337 | 2360 | ||
2338 | for_each_alloc_enabled_rdt_resource(r) { | 2361 | for_each_alloc_enabled_rdt_resource(r) { |
2362 | /* | ||
2363 | * Only initialize default allocations for CBM cache | ||
2364 | * resources | ||
2365 | */ | ||
2366 | if (r->rid == RDT_RESOURCE_MBA) | ||
2367 | continue; | ||
2339 | list_for_each_entry(d, &r->domains, list) { | 2368 | list_for_each_entry(d, &r->domains, list) { |
2340 | d->have_new_ctrl = false; | 2369 | d->have_new_ctrl = false; |
2341 | d->new_ctrl = r->cache.shareable_bits; | 2370 | d->new_ctrl = r->cache.shareable_bits; |
2342 | used_b = r->cache.shareable_bits; | 2371 | used_b = r->cache.shareable_bits; |
2343 | ctrl = d->ctrl_val; | 2372 | ctrl = d->ctrl_val; |
2344 | for (i = 0; i < r->num_closid; i++, ctrl++) { | 2373 | for (i = 0; i < closids_supported(); i++, ctrl++) { |
2345 | if (closid_allocated(i) && i != closid) { | 2374 | if (closid_allocated(i) && i != closid) { |
2346 | mode = rdtgroup_mode_by_closid(i); | 2375 | mode = rdtgroup_mode_by_closid(i); |
2347 | if (mode == RDT_MODE_PSEUDO_LOCKSETUP) | 2376 | if (mode == RDT_MODE_PSEUDO_LOCKSETUP) |
@@ -2373,6 +2402,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) | |||
2373 | } | 2402 | } |
2374 | 2403 | ||
2375 | for_each_alloc_enabled_rdt_resource(r) { | 2404 | for_each_alloc_enabled_rdt_resource(r) { |
2405 | /* | ||
2406 | * Only initialize default allocations for CBM cache | ||
2407 | * resources | ||
2408 | */ | ||
2409 | if (r->rid == RDT_RESOURCE_MBA) | ||
2410 | continue; | ||
2376 | ret = update_domains(r, rdtgrp->closid); | 2411 | ret = update_domains(r, rdtgrp->closid); |
2377 | if (ret < 0) { | 2412 | if (ret < 0) { |
2378 | rdt_last_cmd_puts("failed to initialize allocations\n"); | 2413 | rdt_last_cmd_puts("failed to initialize allocations\n"); |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8047379e575a..ddee1f0870c4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <asm/bootparam_utils.h> | 35 | #include <asm/bootparam_utils.h> |
36 | #include <asm/microcode.h> | 36 | #include <asm/microcode.h> |
37 | #include <asm/kasan.h> | 37 | #include <asm/kasan.h> |
38 | #include <asm/fixmap.h> | ||
38 | 39 | ||
39 | /* | 40 | /* |
40 | * Manage page tables very early on. | 41 | * Manage page tables very early on. |
@@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr) | |||
112 | unsigned long __head __startup_64(unsigned long physaddr, | 113 | unsigned long __head __startup_64(unsigned long physaddr, |
113 | struct boot_params *bp) | 114 | struct boot_params *bp) |
114 | { | 115 | { |
116 | unsigned long vaddr, vaddr_end; | ||
115 | unsigned long load_delta, *p; | 117 | unsigned long load_delta, *p; |
116 | unsigned long pgtable_flags; | 118 | unsigned long pgtable_flags; |
117 | pgdval_t *pgd; | 119 | pgdval_t *pgd; |
@@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
165 | pud[511] += load_delta; | 167 | pud[511] += load_delta; |
166 | 168 | ||
167 | pmd = fixup_pointer(level2_fixmap_pgt, physaddr); | 169 | pmd = fixup_pointer(level2_fixmap_pgt, physaddr); |
168 | pmd[506] += load_delta; | 170 | for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) |
171 | pmd[i] += load_delta; | ||
169 | 172 | ||
170 | /* | 173 | /* |
171 | * Set up the identity mapping for the switchover. These | 174 | * Set up the identity mapping for the switchover. These |
@@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
235 | sme_encrypt_kernel(bp); | 238 | sme_encrypt_kernel(bp); |
236 | 239 | ||
237 | /* | 240 | /* |
241 | * Clear the memory encryption mask from the .bss..decrypted section. | ||
242 | * The bss section will be memset to zero later in the initialization so | ||
243 | * there is no need to zero it after changing the memory encryption | ||
244 | * attribute. | ||
245 | */ | ||
246 | if (mem_encrypt_active()) { | ||
247 | vaddr = (unsigned long)__start_bss_decrypted; | ||
248 | vaddr_end = (unsigned long)__end_bss_decrypted; | ||
249 | for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { | ||
250 | i = pmd_index(vaddr); | ||
251 | pmd[i] -= sme_get_me_mask(); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | /* | ||
238 | * Return the SME encryption mask (if SME is active) to be used as a | 256 | * Return the SME encryption mask (if SME is active) to be used as a |
239 | * modifier for the initial pgdir entry programmed into CR3. | 257 | * modifier for the initial pgdir entry programmed into CR3. |
240 | */ | 258 | */ |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 15ebc2fc166e..a3618cf04cf6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "../entry/calling.h" | 24 | #include "../entry/calling.h" |
25 | #include <asm/export.h> | 25 | #include <asm/export.h> |
26 | #include <asm/nospec-branch.h> | 26 | #include <asm/nospec-branch.h> |
27 | #include <asm/fixmap.h> | ||
27 | 28 | ||
28 | #ifdef CONFIG_PARAVIRT | 29 | #ifdef CONFIG_PARAVIRT |
29 | #include <asm/asm-offsets.h> | 30 | #include <asm/asm-offsets.h> |
@@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt) | |||
445 | KERNEL_IMAGE_SIZE/PMD_SIZE) | 446 | KERNEL_IMAGE_SIZE/PMD_SIZE) |
446 | 447 | ||
447 | NEXT_PAGE(level2_fixmap_pgt) | 448 | NEXT_PAGE(level2_fixmap_pgt) |
448 | .fill 506,8,0 | 449 | .fill (512 - 4 - FIXMAP_PMD_NUM),8,0 |
449 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC | 450 | pgtno = 0 |
450 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ | 451 | .rept (FIXMAP_PMD_NUM) |
451 | .fill 5,8,0 | 452 | .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \ |
453 | + _PAGE_TABLE_NOENC; | ||
454 | pgtno = pgtno + 1 | ||
455 | .endr | ||
456 | /* 6 MB reserved space + a 2MB hole */ | ||
457 | .fill 4,8,0 | ||
452 | 458 | ||
453 | NEXT_PAGE(level1_fixmap_pgt) | 459 | NEXT_PAGE(level1_fixmap_pgt) |
460 | .rept (FIXMAP_PMD_NUM) | ||
454 | .fill 512,8,0 | 461 | .fill 512,8,0 |
462 | .endr | ||
455 | 463 | ||
456 | #undef PMDS | 464 | #undef PMDS |
457 | 465 | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1e6764648af3..013fe3d21dbb 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/sched/clock.h> | 28 | #include <linux/sched/clock.h> |
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/set_memory.h> | ||
31 | 32 | ||
32 | #include <asm/hypervisor.h> | 33 | #include <asm/hypervisor.h> |
33 | #include <asm/mem_encrypt.h> | 34 | #include <asm/mem_encrypt.h> |
@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); | |||
61 | (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) | 62 | (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) |
62 | 63 | ||
63 | static struct pvclock_vsyscall_time_info | 64 | static struct pvclock_vsyscall_time_info |
64 | hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE); | 65 | hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE); |
65 | static struct pvclock_wall_clock wall_clock; | 66 | static struct pvclock_wall_clock wall_clock __bss_decrypted; |
66 | static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); | 67 | static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); |
68 | static struct pvclock_vsyscall_time_info *hvclock_mem; | ||
67 | 69 | ||
68 | static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) | 70 | static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) |
69 | { | 71 | { |
@@ -236,6 +238,45 @@ static void kvm_shutdown(void) | |||
236 | native_machine_shutdown(); | 238 | native_machine_shutdown(); |
237 | } | 239 | } |
238 | 240 | ||
241 | static void __init kvmclock_init_mem(void) | ||
242 | { | ||
243 | unsigned long ncpus; | ||
244 | unsigned int order; | ||
245 | struct page *p; | ||
246 | int r; | ||
247 | |||
248 | if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus()) | ||
249 | return; | ||
250 | |||
251 | ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE; | ||
252 | order = get_order(ncpus * sizeof(*hvclock_mem)); | ||
253 | |||
254 | p = alloc_pages(GFP_KERNEL, order); | ||
255 | if (!p) { | ||
256 | pr_warn("%s: failed to alloc %d pages", __func__, (1U << order)); | ||
257 | return; | ||
258 | } | ||
259 | |||
260 | hvclock_mem = page_address(p); | ||
261 | |||
262 | /* | ||
263 | * hvclock is shared between the guest and the hypervisor, must | ||
264 | * be mapped decrypted. | ||
265 | */ | ||
266 | if (sev_active()) { | ||
267 | r = set_memory_decrypted((unsigned long) hvclock_mem, | ||
268 | 1UL << order); | ||
269 | if (r) { | ||
270 | __free_pages(p, order); | ||
271 | hvclock_mem = NULL; | ||
272 | pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n"); | ||
273 | return; | ||
274 | } | ||
275 | } | ||
276 | |||
277 | memset(hvclock_mem, 0, PAGE_SIZE << order); | ||
278 | } | ||
279 | |||
239 | static int __init kvm_setup_vsyscall_timeinfo(void) | 280 | static int __init kvm_setup_vsyscall_timeinfo(void) |
240 | { | 281 | { |
241 | #ifdef CONFIG_X86_64 | 282 | #ifdef CONFIG_X86_64 |
@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void) | |||
250 | 291 | ||
251 | kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; | 292 | kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; |
252 | #endif | 293 | #endif |
294 | |||
295 | kvmclock_init_mem(); | ||
296 | |||
253 | return 0; | 297 | return 0; |
254 | } | 298 | } |
255 | early_initcall(kvm_setup_vsyscall_timeinfo); | 299 | early_initcall(kvm_setup_vsyscall_timeinfo); |
@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu) | |||
269 | /* Use the static page for the first CPUs, allocate otherwise */ | 313 | /* Use the static page for the first CPUs, allocate otherwise */ |
270 | if (cpu < HVC_BOOT_ARRAY_SIZE) | 314 | if (cpu < HVC_BOOT_ARRAY_SIZE) |
271 | p = &hv_clock_boot[cpu]; | 315 | p = &hv_clock_boot[cpu]; |
316 | else if (hvclock_mem) | ||
317 | p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE; | ||
272 | else | 318 | else |
273 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 319 | return -ENOMEM; |
274 | 320 | ||
275 | per_cpu(hv_clock_per_cpu, cpu) = p; | 321 | per_cpu(hv_clock_per_cpu, cpu) = p; |
276 | return p ? 0 : -ENOMEM; | 322 | return p ? 0 : -ENOMEM; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index afdb303285f8..8dc69d82567e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf, | |||
91 | 91 | ||
92 | if (len < 5) { | 92 | if (len < 5) { |
93 | #ifdef CONFIG_RETPOLINE | 93 | #ifdef CONFIG_RETPOLINE |
94 | WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); | 94 | WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr); |
95 | #endif | 95 | #endif |
96 | return len; /* call too long for patch site */ | 96 | return len; /* call too long for patch site */ |
97 | } | 97 | } |
@@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | |||
111 | 111 | ||
112 | if (len < 5) { | 112 | if (len < 5) { |
113 | #ifdef CONFIG_RETPOLINE | 113 | #ifdef CONFIG_RETPOLINE |
114 | WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); | 114 | WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr); |
115 | #endif | 115 | #endif |
116 | return len; /* call too long for patch site */ | 116 | return len; /* call too long for patch site */ |
117 | } | 117 | } |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8bde0a419f86..5dd3317d761f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -65,6 +65,23 @@ jiffies_64 = jiffies; | |||
65 | #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); | 65 | #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); |
66 | #define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); | 66 | #define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); |
67 | 67 | ||
68 | /* | ||
69 | * This section contains data which will be mapped as decrypted. Memory | ||
70 | * encryption operates on a page basis. Make this section PMD-aligned | ||
71 | * to avoid splitting the pages while mapping the section early. | ||
72 | * | ||
73 | * Note: We use a separate section so that only this section gets | ||
74 | * decrypted to avoid exposing more than we wish. | ||
75 | */ | ||
76 | #define BSS_DECRYPTED \ | ||
77 | . = ALIGN(PMD_SIZE); \ | ||
78 | __start_bss_decrypted = .; \ | ||
79 | *(.bss..decrypted); \ | ||
80 | . = ALIGN(PAGE_SIZE); \ | ||
81 | __start_bss_decrypted_unused = .; \ | ||
82 | . = ALIGN(PMD_SIZE); \ | ||
83 | __end_bss_decrypted = .; \ | ||
84 | |||
68 | #else | 85 | #else |
69 | 86 | ||
70 | #define X86_ALIGN_RODATA_BEGIN | 87 | #define X86_ALIGN_RODATA_BEGIN |
@@ -74,6 +91,7 @@ jiffies_64 = jiffies; | |||
74 | 91 | ||
75 | #define ALIGN_ENTRY_TEXT_BEGIN | 92 | #define ALIGN_ENTRY_TEXT_BEGIN |
76 | #define ALIGN_ENTRY_TEXT_END | 93 | #define ALIGN_ENTRY_TEXT_END |
94 | #define BSS_DECRYPTED | ||
77 | 95 | ||
78 | #endif | 96 | #endif |
79 | 97 | ||
@@ -355,6 +373,7 @@ SECTIONS | |||
355 | __bss_start = .; | 373 | __bss_start = .; |
356 | *(.bss..page_aligned) | 374 | *(.bss..page_aligned) |
357 | *(.bss) | 375 | *(.bss) |
376 | BSS_DECRYPTED | ||
358 | . = ALIGN(PAGE_SIZE); | 377 | . = ALIGN(PAGE_SIZE); |
359 | __bss_stop = .; | 378 | __bss_stop = .; |
360 | } | 379 | } |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7a8fc26c1115..faca978ebf9d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end) | |||
815 | set_memory_np_noalias(begin_ul, len_pages); | 815 | set_memory_np_noalias(begin_ul, len_pages); |
816 | } | 816 | } |
817 | 817 | ||
818 | void __weak mem_encrypt_free_decrypted_mem(void) { } | ||
819 | |||
818 | void __ref free_initmem(void) | 820 | void __ref free_initmem(void) |
819 | { | 821 | { |
820 | e820__reallocate_tables(); | 822 | e820__reallocate_tables(); |
821 | 823 | ||
824 | mem_encrypt_free_decrypted_mem(); | ||
825 | |||
822 | free_kernel_image_pages(&__init_begin, &__init_end); | 826 | free_kernel_image_pages(&__init_begin, &__init_end); |
823 | } | 827 | } |
824 | 828 | ||
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index b2de398d1fd3..006f373f54ab 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c | |||
@@ -348,6 +348,30 @@ bool sev_active(void) | |||
348 | EXPORT_SYMBOL(sev_active); | 348 | EXPORT_SYMBOL(sev_active); |
349 | 349 | ||
350 | /* Architecture __weak replacement functions */ | 350 | /* Architecture __weak replacement functions */ |
351 | void __init mem_encrypt_free_decrypted_mem(void) | ||
352 | { | ||
353 | unsigned long vaddr, vaddr_end, npages; | ||
354 | int r; | ||
355 | |||
356 | vaddr = (unsigned long)__start_bss_decrypted_unused; | ||
357 | vaddr_end = (unsigned long)__end_bss_decrypted; | ||
358 | npages = (vaddr_end - vaddr) >> PAGE_SHIFT; | ||
359 | |||
360 | /* | ||
361 | * The unused memory range was mapped decrypted, change the encryption | ||
362 | * attribute from decrypted to encrypted before freeing it. | ||
363 | */ | ||
364 | if (mem_encrypt_active()) { | ||
365 | r = set_memory_encrypted(vaddr, npages); | ||
366 | if (r) { | ||
367 | pr_warn("failed to free unused decrypted pages\n"); | ||
368 | return; | ||
369 | } | ||
370 | } | ||
371 | |||
372 | free_init_pages("unused decrypted", vaddr, vaddr_end); | ||
373 | } | ||
374 | |||
351 | void __init mem_encrypt_init(void) | 375 | void __init mem_encrypt_init(void) |
352 | { | 376 | { |
353 | if (!sme_me_mask) | 377 | if (!sme_me_mask) |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ae394552fb94..089e78c4effd 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -637,6 +637,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) | |||
637 | { | 637 | { |
638 | unsigned long address = __fix_to_virt(idx); | 638 | unsigned long address = __fix_to_virt(idx); |
639 | 639 | ||
640 | #ifdef CONFIG_X86_64 | ||
641 | /* | ||
642 | * Ensure that the static initial page tables are covering the | ||
643 | * fixmap completely. | ||
644 | */ | ||
645 | BUILD_BUG_ON(__end_of_permanent_fixed_addresses > | ||
646 | (FIXMAP_PMD_NUM * PTRS_PER_PTE)); | ||
647 | #endif | ||
648 | |||
640 | if (idx >= __end_of_fixed_addresses) { | 649 | if (idx >= __end_of_fixed_addresses) { |
641 | BUG(); | 650 | BUG(); |
642 | return; | 651 | return; |
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2fe5c9b1816b..dd461c0167ef 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c | |||
@@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1907 | /* L3_k[511] -> level2_fixmap_pgt */ | 1907 | /* L3_k[511] -> level2_fixmap_pgt */ |
1908 | convert_pfn_mfn(level3_kernel_pgt); | 1908 | convert_pfn_mfn(level3_kernel_pgt); |
1909 | 1909 | ||
1910 | /* L3_k[511][506] -> level1_fixmap_pgt */ | 1910 | /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */ |
1911 | convert_pfn_mfn(level2_fixmap_pgt); | 1911 | convert_pfn_mfn(level2_fixmap_pgt); |
1912 | 1912 | ||
1913 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | 1913 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ |
@@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1952 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | 1952 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); |
1953 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1953 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); |
1954 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1954 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); |
1955 | set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); | 1955 | |
1956 | for (i = 0; i < FIXMAP_PMD_NUM; i++) { | ||
1957 | set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE, | ||
1958 | PAGE_KERNEL_RO); | ||
1959 | } | ||
1956 | 1960 | ||
1957 | /* Pin down new L4 */ | 1961 | /* Pin down new L4 */ |
1958 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | 1962 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, |
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 7d00d4ad44d4..95997e6c0696 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c | |||
@@ -478,7 +478,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, | |||
478 | irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) | 478 | irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) |
479 | { | 479 | { |
480 | int err, ret = IRQ_NONE; | 480 | int err, ret = IRQ_NONE; |
481 | struct pt_regs regs; | 481 | struct pt_regs regs = {0}; |
482 | const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | 482 | const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); |
483 | uint8_t xenpmu_flags = get_xenpmu_flags(); | 483 | uint8_t xenpmu_flags = get_xenpmu_flags(); |
484 | 484 | ||
diff --git a/block/bio.c b/block/bio.c index 8c680a776171..0093bed81c0e 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -1684,7 +1684,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op, | |||
1684 | const int sgrp = op_stat_group(req_op); | 1684 | const int sgrp = op_stat_group(req_op); |
1685 | int cpu = part_stat_lock(); | 1685 | int cpu = part_stat_lock(); |
1686 | 1686 | ||
1687 | part_stat_add(cpu, part, ticks[sgrp], duration); | 1687 | part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration)); |
1688 | part_round_stats(q, cpu, part); | 1688 | part_round_stats(q, cpu, part); |
1689 | part_dec_in_flight(q, part, op_is_write(req_op)); | 1689 | part_dec_in_flight(q, part, op_is_write(req_op)); |
1690 | 1690 | ||
diff --git a/block/blk-core.c b/block/blk-core.c index 4dbc93f43b38..cff0a60ee200 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -2733,17 +2733,15 @@ void blk_account_io_done(struct request *req, u64 now) | |||
2733 | * containing request is enough. | 2733 | * containing request is enough. |
2734 | */ | 2734 | */ |
2735 | if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { | 2735 | if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { |
2736 | unsigned long duration; | ||
2737 | const int sgrp = op_stat_group(req_op(req)); | 2736 | const int sgrp = op_stat_group(req_op(req)); |
2738 | struct hd_struct *part; | 2737 | struct hd_struct *part; |
2739 | int cpu; | 2738 | int cpu; |
2740 | 2739 | ||
2741 | duration = nsecs_to_jiffies(now - req->start_time_ns); | ||
2742 | cpu = part_stat_lock(); | 2740 | cpu = part_stat_lock(); |
2743 | part = req->part; | 2741 | part = req->part; |
2744 | 2742 | ||
2745 | part_stat_inc(cpu, part, ios[sgrp]); | 2743 | part_stat_inc(cpu, part, ios[sgrp]); |
2746 | part_stat_add(cpu, part, ticks[sgrp], duration); | 2744 | part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns); |
2747 | part_round_stats(req->q, cpu, part); | 2745 | part_round_stats(req->q, cpu, part); |
2748 | part_dec_in_flight(req->q, part, rq_data_dir(req)); | 2746 | part_dec_in_flight(req->q, part, rq_data_dir(req)); |
2749 | 2747 | ||
diff --git a/block/genhd.c b/block/genhd.c index 8cc719a37b32..be5bab20b2ab 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -1343,18 +1343,18 @@ static int diskstats_show(struct seq_file *seqf, void *v) | |||
1343 | part_stat_read(hd, ios[STAT_READ]), | 1343 | part_stat_read(hd, ios[STAT_READ]), |
1344 | part_stat_read(hd, merges[STAT_READ]), | 1344 | part_stat_read(hd, merges[STAT_READ]), |
1345 | part_stat_read(hd, sectors[STAT_READ]), | 1345 | part_stat_read(hd, sectors[STAT_READ]), |
1346 | jiffies_to_msecs(part_stat_read(hd, ticks[STAT_READ])), | 1346 | (unsigned int)part_stat_read_msecs(hd, STAT_READ), |
1347 | part_stat_read(hd, ios[STAT_WRITE]), | 1347 | part_stat_read(hd, ios[STAT_WRITE]), |
1348 | part_stat_read(hd, merges[STAT_WRITE]), | 1348 | part_stat_read(hd, merges[STAT_WRITE]), |
1349 | part_stat_read(hd, sectors[STAT_WRITE]), | 1349 | part_stat_read(hd, sectors[STAT_WRITE]), |
1350 | jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])), | 1350 | (unsigned int)part_stat_read_msecs(hd, STAT_WRITE), |
1351 | inflight[0], | 1351 | inflight[0], |
1352 | jiffies_to_msecs(part_stat_read(hd, io_ticks)), | 1352 | jiffies_to_msecs(part_stat_read(hd, io_ticks)), |
1353 | jiffies_to_msecs(part_stat_read(hd, time_in_queue)), | 1353 | jiffies_to_msecs(part_stat_read(hd, time_in_queue)), |
1354 | part_stat_read(hd, ios[STAT_DISCARD]), | 1354 | part_stat_read(hd, ios[STAT_DISCARD]), |
1355 | part_stat_read(hd, merges[STAT_DISCARD]), | 1355 | part_stat_read(hd, merges[STAT_DISCARD]), |
1356 | part_stat_read(hd, sectors[STAT_DISCARD]), | 1356 | part_stat_read(hd, sectors[STAT_DISCARD]), |
1357 | jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD])) | 1357 | (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD) |
1358 | ); | 1358 | ); |
1359 | } | 1359 | } |
1360 | disk_part_iter_exit(&piter); | 1360 | disk_part_iter_exit(&piter); |
diff --git a/block/partition-generic.c b/block/partition-generic.c index 5a8975a1201c..d3d14e81fb12 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c | |||
@@ -136,18 +136,18 @@ ssize_t part_stat_show(struct device *dev, | |||
136 | part_stat_read(p, ios[STAT_READ]), | 136 | part_stat_read(p, ios[STAT_READ]), |
137 | part_stat_read(p, merges[STAT_READ]), | 137 | part_stat_read(p, merges[STAT_READ]), |
138 | (unsigned long long)part_stat_read(p, sectors[STAT_READ]), | 138 | (unsigned long long)part_stat_read(p, sectors[STAT_READ]), |
139 | jiffies_to_msecs(part_stat_read(p, ticks[STAT_READ])), | 139 | (unsigned int)part_stat_read_msecs(p, STAT_READ), |
140 | part_stat_read(p, ios[STAT_WRITE]), | 140 | part_stat_read(p, ios[STAT_WRITE]), |
141 | part_stat_read(p, merges[STAT_WRITE]), | 141 | part_stat_read(p, merges[STAT_WRITE]), |
142 | (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]), | 142 | (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]), |
143 | jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])), | 143 | (unsigned int)part_stat_read_msecs(p, STAT_WRITE), |
144 | inflight[0], | 144 | inflight[0], |
145 | jiffies_to_msecs(part_stat_read(p, io_ticks)), | 145 | jiffies_to_msecs(part_stat_read(p, io_ticks)), |
146 | jiffies_to_msecs(part_stat_read(p, time_in_queue)), | 146 | jiffies_to_msecs(part_stat_read(p, time_in_queue)), |
147 | part_stat_read(p, ios[STAT_DISCARD]), | 147 | part_stat_read(p, ios[STAT_DISCARD]), |
148 | part_stat_read(p, merges[STAT_DISCARD]), | 148 | part_stat_read(p, merges[STAT_DISCARD]), |
149 | (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]), | 149 | (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]), |
150 | jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD]))); | 150 | (unsigned int)part_stat_read_msecs(p, STAT_DISCARD)); |
151 | } | 151 | } |
152 | 152 | ||
153 | ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, | 153 | ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, |
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index d8e159feb573..89110dfc7127 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig | |||
@@ -90,14 +90,17 @@ config EFI_ARMSTUB | |||
90 | config EFI_ARMSTUB_DTB_LOADER | 90 | config EFI_ARMSTUB_DTB_LOADER |
91 | bool "Enable the DTB loader" | 91 | bool "Enable the DTB loader" |
92 | depends on EFI_ARMSTUB | 92 | depends on EFI_ARMSTUB |
93 | default y | ||
93 | help | 94 | help |
94 | Select this config option to add support for the dtb= command | 95 | Select this config option to add support for the dtb= command |
95 | line parameter, allowing a device tree blob to be loaded into | 96 | line parameter, allowing a device tree blob to be loaded into |
96 | memory from the EFI System Partition by the stub. | 97 | memory from the EFI System Partition by the stub. |
97 | 98 | ||
98 | The device tree is typically provided by the platform or by | 99 | If the device tree is provided by the platform or by |
99 | the bootloader, so this option is mostly for development | 100 | the bootloader this option may not be needed. |
100 | purposes only. | 101 | But, for various development reasons and to maintain existing |
102 | functionality for bootloaders that do not have such support | ||
103 | this option is necessary. | ||
101 | 104 | ||
102 | config EFI_BOOTLOADER_CONTROL | 105 | config EFI_BOOTLOADER_CONTROL |
103 | tristate "EFI Bootloader Control" | 106 | tristate "EFI Bootloader Control" |
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index e11ab12fbdf2..800986a79704 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c | |||
@@ -528,8 +528,8 @@ static int usbhs_omap_get_dt_pdata(struct device *dev, | |||
528 | } | 528 | } |
529 | 529 | ||
530 | static const struct of_device_id usbhs_child_match_table[] = { | 530 | static const struct of_device_id usbhs_child_match_table[] = { |
531 | { .compatible = "ti,omap-ehci", }, | 531 | { .compatible = "ti,ehci-omap", }, |
532 | { .compatible = "ti,omap-ohci", }, | 532 | { .compatible = "ti,ohci-omap3", }, |
533 | { } | 533 | { } |
534 | }; | 534 | }; |
535 | 535 | ||
@@ -855,6 +855,7 @@ static struct platform_driver usbhs_omap_driver = { | |||
855 | .pm = &usbhsomap_dev_pm_ops, | 855 | .pm = &usbhsomap_dev_pm_ops, |
856 | .of_match_table = usbhs_omap_dt_ids, | 856 | .of_match_table = usbhs_omap_dt_ids, |
857 | }, | 857 | }, |
858 | .probe = usbhs_omap_probe, | ||
858 | .remove = usbhs_omap_remove, | 859 | .remove = usbhs_omap_remove, |
859 | }; | 860 | }; |
860 | 861 | ||
@@ -864,9 +865,9 @@ MODULE_ALIAS("platform:" USBHS_DRIVER_NAME); | |||
864 | MODULE_LICENSE("GPL v2"); | 865 | MODULE_LICENSE("GPL v2"); |
865 | MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI"); | 866 | MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI"); |
866 | 867 | ||
867 | static int __init omap_usbhs_drvinit(void) | 868 | static int omap_usbhs_drvinit(void) |
868 | { | 869 | { |
869 | return platform_driver_probe(&usbhs_omap_driver, usbhs_omap_probe); | 870 | return platform_driver_register(&usbhs_omap_driver); |
870 | } | 871 | } |
871 | 872 | ||
872 | /* | 873 | /* |
@@ -878,7 +879,7 @@ static int __init omap_usbhs_drvinit(void) | |||
878 | */ | 879 | */ |
879 | fs_initcall_sync(omap_usbhs_drvinit); | 880 | fs_initcall_sync(omap_usbhs_drvinit); |
880 | 881 | ||
881 | static void __exit omap_usbhs_drvexit(void) | 882 | static void omap_usbhs_drvexit(void) |
882 | { | 883 | { |
883 | platform_driver_unregister(&usbhs_omap_driver); | 884 | platform_driver_unregister(&usbhs_omap_driver); |
884 | } | 885 | } |
diff --git a/drivers/pinctrl/intel/pinctrl-cannonlake.c b/drivers/pinctrl/intel/pinctrl-cannonlake.c index fb1afe55bf53..8d48371caaa2 100644 --- a/drivers/pinctrl/intel/pinctrl-cannonlake.c +++ b/drivers/pinctrl/intel/pinctrl-cannonlake.c | |||
@@ -379,7 +379,7 @@ static const struct intel_padgroup cnlh_community1_gpps[] = { | |||
379 | static const struct intel_padgroup cnlh_community3_gpps[] = { | 379 | static const struct intel_padgroup cnlh_community3_gpps[] = { |
380 | CNL_GPP(0, 155, 178, 192), /* GPP_K */ | 380 | CNL_GPP(0, 155, 178, 192), /* GPP_K */ |
381 | CNL_GPP(1, 179, 202, 224), /* GPP_H */ | 381 | CNL_GPP(1, 179, 202, 224), /* GPP_H */ |
382 | CNL_GPP(2, 203, 215, 258), /* GPP_E */ | 382 | CNL_GPP(2, 203, 215, 256), /* GPP_E */ |
383 | CNL_GPP(3, 216, 239, 288), /* GPP_F */ | 383 | CNL_GPP(3, 216, 239, 288), /* GPP_F */ |
384 | CNL_GPP(4, 240, 248, CNL_NO_GPIO), /* SPI */ | 384 | CNL_GPP(4, 240, 248, CNL_NO_GPIO), /* SPI */ |
385 | }; | 385 | }; |
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 62b009b27eda..ec8dafc94694 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c | |||
@@ -747,13 +747,63 @@ static const struct pinctrl_desc intel_pinctrl_desc = { | |||
747 | .owner = THIS_MODULE, | 747 | .owner = THIS_MODULE, |
748 | }; | 748 | }; |
749 | 749 | ||
750 | /** | ||
751 | * intel_gpio_to_pin() - Translate from GPIO offset to pin number | ||
752 | * @pctrl: Pinctrl structure | ||
753 | * @offset: GPIO offset from gpiolib | ||
754 | * @commmunity: Community is filled here if not %NULL | ||
755 | * @padgrp: Pad group is filled here if not %NULL | ||
756 | * | ||
757 | * When coming through gpiolib irqchip, the GPIO offset is not | ||
758 | * automatically translated to pinctrl pin number. This function can be | ||
759 | * used to find out the corresponding pinctrl pin. | ||
760 | */ | ||
761 | static int intel_gpio_to_pin(struct intel_pinctrl *pctrl, unsigned offset, | ||
762 | const struct intel_community **community, | ||
763 | const struct intel_padgroup **padgrp) | ||
764 | { | ||
765 | int i; | ||
766 | |||
767 | for (i = 0; i < pctrl->ncommunities; i++) { | ||
768 | const struct intel_community *comm = &pctrl->communities[i]; | ||
769 | int j; | ||
770 | |||
771 | for (j = 0; j < comm->ngpps; j++) { | ||
772 | const struct intel_padgroup *pgrp = &comm->gpps[j]; | ||
773 | |||
774 | if (pgrp->gpio_base < 0) | ||
775 | continue; | ||
776 | |||
777 | if (offset >= pgrp->gpio_base && | ||
778 | offset < pgrp->gpio_base + pgrp->size) { | ||
779 | int pin; | ||
780 | |||
781 | pin = pgrp->base + offset - pgrp->gpio_base; | ||
782 | if (community) | ||
783 | *community = comm; | ||
784 | if (padgrp) | ||
785 | *padgrp = pgrp; | ||
786 | |||
787 | return pin; | ||
788 | } | ||
789 | } | ||
790 | } | ||
791 | |||
792 | return -EINVAL; | ||
793 | } | ||
794 | |||
750 | static int intel_gpio_get(struct gpio_chip *chip, unsigned offset) | 795 | static int intel_gpio_get(struct gpio_chip *chip, unsigned offset) |
751 | { | 796 | { |
752 | struct intel_pinctrl *pctrl = gpiochip_get_data(chip); | 797 | struct intel_pinctrl *pctrl = gpiochip_get_data(chip); |
753 | void __iomem *reg; | 798 | void __iomem *reg; |
754 | u32 padcfg0; | 799 | u32 padcfg0; |
800 | int pin; | ||
801 | |||
802 | pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL); | ||
803 | if (pin < 0) | ||
804 | return -EINVAL; | ||
755 | 805 | ||
756 | reg = intel_get_padcfg(pctrl, offset, PADCFG0); | 806 | reg = intel_get_padcfg(pctrl, pin, PADCFG0); |
757 | if (!reg) | 807 | if (!reg) |
758 | return -EINVAL; | 808 | return -EINVAL; |
759 | 809 | ||
@@ -770,8 +820,13 @@ static void intel_gpio_set(struct gpio_chip *chip, unsigned offset, int value) | |||
770 | unsigned long flags; | 820 | unsigned long flags; |
771 | void __iomem *reg; | 821 | void __iomem *reg; |
772 | u32 padcfg0; | 822 | u32 padcfg0; |
823 | int pin; | ||
824 | |||
825 | pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL); | ||
826 | if (pin < 0) | ||
827 | return; | ||
773 | 828 | ||
774 | reg = intel_get_padcfg(pctrl, offset, PADCFG0); | 829 | reg = intel_get_padcfg(pctrl, pin, PADCFG0); |
775 | if (!reg) | 830 | if (!reg) |
776 | return; | 831 | return; |
777 | 832 | ||
@@ -790,8 +845,13 @@ static int intel_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) | |||
790 | struct intel_pinctrl *pctrl = gpiochip_get_data(chip); | 845 | struct intel_pinctrl *pctrl = gpiochip_get_data(chip); |
791 | void __iomem *reg; | 846 | void __iomem *reg; |
792 | u32 padcfg0; | 847 | u32 padcfg0; |
848 | int pin; | ||
793 | 849 | ||
794 | reg = intel_get_padcfg(pctrl, offset, PADCFG0); | 850 | pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL); |
851 | if (pin < 0) | ||
852 | return -EINVAL; | ||
853 | |||
854 | reg = intel_get_padcfg(pctrl, pin, PADCFG0); | ||
795 | if (!reg) | 855 | if (!reg) |
796 | return -EINVAL; | 856 | return -EINVAL; |
797 | 857 | ||
@@ -827,51 +887,6 @@ static const struct gpio_chip intel_gpio_chip = { | |||
827 | .set_config = gpiochip_generic_config, | 887 | .set_config = gpiochip_generic_config, |
828 | }; | 888 | }; |
829 | 889 | ||
830 | /** | ||
831 | * intel_gpio_to_pin() - Translate from GPIO offset to pin number | ||
832 | * @pctrl: Pinctrl structure | ||
833 | * @offset: GPIO offset from gpiolib | ||
834 | * @commmunity: Community is filled here if not %NULL | ||
835 | * @padgrp: Pad group is filled here if not %NULL | ||
836 | * | ||
837 | * When coming through gpiolib irqchip, the GPIO offset is not | ||
838 | * automatically translated to pinctrl pin number. This function can be | ||
839 | * used to find out the corresponding pinctrl pin. | ||
840 | */ | ||
841 | static int intel_gpio_to_pin(struct intel_pinctrl *pctrl, unsigned offset, | ||
842 | const struct intel_community **community, | ||
843 | const struct intel_padgroup **padgrp) | ||
844 | { | ||
845 | int i; | ||
846 | |||
847 | for (i = 0; i < pctrl->ncommunities; i++) { | ||
848 | const struct intel_community *comm = &pctrl->communities[i]; | ||
849 | int j; | ||
850 | |||
851 | for (j = 0; j < comm->ngpps; j++) { | ||
852 | const struct intel_padgroup *pgrp = &comm->gpps[j]; | ||
853 | |||
854 | if (pgrp->gpio_base < 0) | ||
855 | continue; | ||
856 | |||
857 | if (offset >= pgrp->gpio_base && | ||
858 | offset < pgrp->gpio_base + pgrp->size) { | ||
859 | int pin; | ||
860 | |||
861 | pin = pgrp->base + offset - pgrp->gpio_base; | ||
862 | if (community) | ||
863 | *community = comm; | ||
864 | if (padgrp) | ||
865 | *padgrp = pgrp; | ||
866 | |||
867 | return pin; | ||
868 | } | ||
869 | } | ||
870 | } | ||
871 | |||
872 | return -EINVAL; | ||
873 | } | ||
874 | |||
875 | static int intel_gpio_irq_reqres(struct irq_data *d) | 890 | static int intel_gpio_irq_reqres(struct irq_data *d) |
876 | { | 891 | { |
877 | struct gpio_chip *gc = irq_data_get_irq_chip_data(d); | 892 | struct gpio_chip *gc = irq_data_get_irq_chip_data(d); |
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7bafa703a992..84575baceebc 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -1040,18 +1040,33 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, | |||
1040 | return ret; | 1040 | return ret; |
1041 | 1041 | ||
1042 | for (i = 0; i < count; i++) { | 1042 | for (i = 0; i < count; i++) { |
1043 | /* Retry eagain maps */ | 1043 | switch (map_ops[i].status) { |
1044 | if (map_ops[i].status == GNTST_eagain) | 1044 | case GNTST_okay: |
1045 | gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, | 1045 | { |
1046 | &map_ops[i].status, __func__); | ||
1047 | |||
1048 | if (map_ops[i].status == GNTST_okay) { | ||
1049 | struct xen_page_foreign *foreign; | 1046 | struct xen_page_foreign *foreign; |
1050 | 1047 | ||
1051 | SetPageForeign(pages[i]); | 1048 | SetPageForeign(pages[i]); |
1052 | foreign = xen_page_foreign(pages[i]); | 1049 | foreign = xen_page_foreign(pages[i]); |
1053 | foreign->domid = map_ops[i].dom; | 1050 | foreign->domid = map_ops[i].dom; |
1054 | foreign->gref = map_ops[i].ref; | 1051 | foreign->gref = map_ops[i].ref; |
1052 | break; | ||
1053 | } | ||
1054 | |||
1055 | case GNTST_no_device_space: | ||
1056 | pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n"); | ||
1057 | break; | ||
1058 | |||
1059 | case GNTST_eagain: | ||
1060 | /* Retry eagain maps */ | ||
1061 | gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, | ||
1062 | map_ops + i, | ||
1063 | &map_ops[i].status, __func__); | ||
1064 | /* Test status in next loop iteration. */ | ||
1065 | i--; | ||
1066 | break; | ||
1067 | |||
1068 | default: | ||
1069 | break; | ||
1055 | } | 1070 | } |
1056 | } | 1071 | } |
1057 | 1072 | ||
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 57864422a2c8..25c08c6c7f99 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -83,10 +83,10 @@ struct partition { | |||
83 | } __attribute__((packed)); | 83 | } __attribute__((packed)); |
84 | 84 | ||
85 | struct disk_stats { | 85 | struct disk_stats { |
86 | u64 nsecs[NR_STAT_GROUPS]; | ||
86 | unsigned long sectors[NR_STAT_GROUPS]; | 87 | unsigned long sectors[NR_STAT_GROUPS]; |
87 | unsigned long ios[NR_STAT_GROUPS]; | 88 | unsigned long ios[NR_STAT_GROUPS]; |
88 | unsigned long merges[NR_STAT_GROUPS]; | 89 | unsigned long merges[NR_STAT_GROUPS]; |
89 | unsigned long ticks[NR_STAT_GROUPS]; | ||
90 | unsigned long io_ticks; | 90 | unsigned long io_ticks; |
91 | unsigned long time_in_queue; | 91 | unsigned long time_in_queue; |
92 | }; | 92 | }; |
@@ -354,6 +354,9 @@ static inline void free_part_stats(struct hd_struct *part) | |||
354 | 354 | ||
355 | #endif /* CONFIG_SMP */ | 355 | #endif /* CONFIG_SMP */ |
356 | 356 | ||
357 | #define part_stat_read_msecs(part, which) \ | ||
358 | div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC) | ||
359 | |||
357 | #define part_stat_read_accum(part, field) \ | 360 | #define part_stat_read_accum(part, field) \ |
358 | (part_stat_read(part, field[STAT_READ]) + \ | 361 | (part_stat_read(part, field[STAT_READ]) + \ |
359 | part_stat_read(part, field[STAT_WRITE]) + \ | 362 | part_stat_read(part, field[STAT_WRITE]) + \ |
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 8bdbb5f29494..74b0aa9c7499 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h | |||
@@ -357,6 +357,8 @@ | |||
357 | #define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) | 357 | #define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) |
358 | #define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) | 358 | #define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) |
359 | 359 | ||
360 | #define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12)) | ||
361 | |||
360 | #define GITS_BASER_NR_REGS 8 | 362 | #define GITS_BASER_NR_REGS 8 |
361 | 363 | ||
362 | #define GITS_BASER_VALID (1ULL << 63) | 364 | #define GITS_BASER_VALID (1ULL << 63) |
@@ -388,6 +390,9 @@ | |||
388 | #define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) | 390 | #define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) |
389 | #define GITS_BASER_PHYS_52_to_48(phys) \ | 391 | #define GITS_BASER_PHYS_52_to_48(phys) \ |
390 | (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) | 392 | (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) |
393 | #define GITS_BASER_ADDR_48_to_52(baser) \ | ||
394 | (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48) | ||
395 | |||
391 | #define GITS_BASER_SHAREABILITY_SHIFT (10) | 396 | #define GITS_BASER_SHAREABILITY_SHIFT (10) |
392 | #define GITS_BASER_InnerShareable \ | 397 | #define GITS_BASER_InnerShareable \ |
393 | GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | 398 | GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) |
diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h index 8a125701ef7b..50bed4f89c1a 100644 --- a/include/linux/mfd/da9063/pdata.h +++ b/include/linux/mfd/da9063/pdata.h | |||
@@ -21,7 +21,7 @@ | |||
21 | /* | 21 | /* |
22 | * Regulator configuration | 22 | * Regulator configuration |
23 | */ | 23 | */ |
24 | /* DA9063 regulator IDs */ | 24 | /* DA9063 and DA9063L regulator IDs */ |
25 | enum { | 25 | enum { |
26 | /* BUCKs */ | 26 | /* BUCKs */ |
27 | DA9063_ID_BCORE1, | 27 | DA9063_ID_BCORE1, |
@@ -37,18 +37,20 @@ enum { | |||
37 | DA9063_ID_BMEM_BIO_MERGED, | 37 | DA9063_ID_BMEM_BIO_MERGED, |
38 | /* When two BUCKs are merged, they cannot be reused separately */ | 38 | /* When two BUCKs are merged, they cannot be reused separately */ |
39 | 39 | ||
40 | /* LDOs */ | 40 | /* LDOs on both DA9063 and DA9063L */ |
41 | DA9063_ID_LDO3, | ||
42 | DA9063_ID_LDO7, | ||
43 | DA9063_ID_LDO8, | ||
44 | DA9063_ID_LDO9, | ||
45 | DA9063_ID_LDO11, | ||
46 | |||
47 | /* DA9063-only LDOs */ | ||
41 | DA9063_ID_LDO1, | 48 | DA9063_ID_LDO1, |
42 | DA9063_ID_LDO2, | 49 | DA9063_ID_LDO2, |
43 | DA9063_ID_LDO3, | ||
44 | DA9063_ID_LDO4, | 50 | DA9063_ID_LDO4, |
45 | DA9063_ID_LDO5, | 51 | DA9063_ID_LDO5, |
46 | DA9063_ID_LDO6, | 52 | DA9063_ID_LDO6, |
47 | DA9063_ID_LDO7, | ||
48 | DA9063_ID_LDO8, | ||
49 | DA9063_ID_LDO9, | ||
50 | DA9063_ID_LDO10, | 53 | DA9063_ID_LDO10, |
51 | DA9063_ID_LDO11, | ||
52 | }; | 54 | }; |
53 | 55 | ||
54 | /* Regulators platform data */ | 56 | /* Regulators platform data */ |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cb6d44e1fe02..2b7a652c9fa4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -758,6 +758,15 @@ struct kvm_ppc_resize_hpt { | |||
758 | #define KVM_S390_SIE_PAGE_OFFSET 1 | 758 | #define KVM_S390_SIE_PAGE_OFFSET 1 |
759 | 759 | ||
760 | /* | 760 | /* |
761 | * On arm64, machine type can be used to request the physical | ||
762 | * address size for the VM. Bits[7-0] are reserved for the guest | ||
763 | * PA size shift (i.e, log2(PA_Size)). For backward compatibility, | ||
764 | * value 0 implies the default IPA size, 40bits. | ||
765 | */ | ||
766 | #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL | ||
767 | #define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ | ||
768 | ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) | ||
769 | /* | ||
761 | * ioctls for /dev/kvm fds: | 770 | * ioctls for /dev/kvm fds: |
762 | */ | 771 | */ |
763 | #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) | 772 | #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) |
@@ -965,6 +974,7 @@ struct kvm_ppc_resize_hpt { | |||
965 | #define KVM_CAP_COALESCED_PIO 162 | 974 | #define KVM_CAP_COALESCED_PIO 162 |
966 | #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 | 975 | #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 |
967 | #define KVM_CAP_EXCEPTION_PAYLOAD 164 | 976 | #define KVM_CAP_EXCEPTION_PAYLOAD 164 |
977 | #define KVM_CAP_ARM_VM_IPA_SIZE 165 | ||
968 | 978 | ||
969 | #ifdef KVM_CAP_IRQ_ROUTING | 979 | #ifdef KVM_CAP_IRQ_ROUTING |
970 | 980 | ||
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 13a861135127..6eb9bacd1948 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build | |||
@@ -1 +1 @@ | |||
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o | libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o | ||
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2abd0f112627..bdb94939fd60 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include "libbpf.h" | 50 | #include "libbpf.h" |
51 | #include "bpf.h" | 51 | #include "bpf.h" |
52 | #include "btf.h" | 52 | #include "btf.h" |
53 | #include "str_error.h" | ||
53 | 54 | ||
54 | #ifndef EM_BPF | 55 | #ifndef EM_BPF |
55 | #define EM_BPF 247 | 56 | #define EM_BPF 247 |
@@ -469,7 +470,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) | |||
469 | obj->efile.fd = open(obj->path, O_RDONLY); | 470 | obj->efile.fd = open(obj->path, O_RDONLY); |
470 | if (obj->efile.fd < 0) { | 471 | if (obj->efile.fd < 0) { |
471 | char errmsg[STRERR_BUFSIZE]; | 472 | char errmsg[STRERR_BUFSIZE]; |
472 | char *cp = strerror_r(errno, errmsg, sizeof(errmsg)); | 473 | char *cp = str_error(errno, errmsg, sizeof(errmsg)); |
473 | 474 | ||
474 | pr_warning("failed to open %s: %s\n", obj->path, cp); | 475 | pr_warning("failed to open %s: %s\n", obj->path, cp); |
475 | return -errno; | 476 | return -errno; |
@@ -810,8 +811,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) | |||
810 | data->d_size, name, idx); | 811 | data->d_size, name, idx); |
811 | if (err) { | 812 | if (err) { |
812 | char errmsg[STRERR_BUFSIZE]; | 813 | char errmsg[STRERR_BUFSIZE]; |
813 | char *cp = strerror_r(-err, errmsg, | 814 | char *cp = str_error(-err, errmsg, sizeof(errmsg)); |
814 | sizeof(errmsg)); | ||
815 | 815 | ||
816 | pr_warning("failed to alloc program %s (%s): %s", | 816 | pr_warning("failed to alloc program %s (%s): %s", |
817 | name, obj->path, cp); | 817 | name, obj->path, cp); |
@@ -1140,7 +1140,7 @@ bpf_object__create_maps(struct bpf_object *obj) | |||
1140 | 1140 | ||
1141 | *pfd = bpf_create_map_xattr(&create_attr); | 1141 | *pfd = bpf_create_map_xattr(&create_attr); |
1142 | if (*pfd < 0 && create_attr.btf_key_type_id) { | 1142 | if (*pfd < 0 && create_attr.btf_key_type_id) { |
1143 | cp = strerror_r(errno, errmsg, sizeof(errmsg)); | 1143 | cp = str_error(errno, errmsg, sizeof(errmsg)); |
1144 | pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", | 1144 | pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", |
1145 | map->name, cp, errno); | 1145 | map->name, cp, errno); |
1146 | create_attr.btf_fd = 0; | 1146 | create_attr.btf_fd = 0; |
@@ -1155,7 +1155,7 @@ bpf_object__create_maps(struct bpf_object *obj) | |||
1155 | size_t j; | 1155 | size_t j; |
1156 | 1156 | ||
1157 | err = *pfd; | 1157 | err = *pfd; |
1158 | cp = strerror_r(errno, errmsg, sizeof(errmsg)); | 1158 | cp = str_error(errno, errmsg, sizeof(errmsg)); |
1159 | pr_warning("failed to create map (name: '%s'): %s\n", | 1159 | pr_warning("failed to create map (name: '%s'): %s\n", |
1160 | map->name, cp); | 1160 | map->name, cp); |
1161 | for (j = 0; j < i; j++) | 1161 | for (j = 0; j < i; j++) |
@@ -1339,7 +1339,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, | |||
1339 | } | 1339 | } |
1340 | 1340 | ||
1341 | ret = -LIBBPF_ERRNO__LOAD; | 1341 | ret = -LIBBPF_ERRNO__LOAD; |
1342 | cp = strerror_r(errno, errmsg, sizeof(errmsg)); | 1342 | cp = str_error(errno, errmsg, sizeof(errmsg)); |
1343 | pr_warning("load bpf program failed: %s\n", cp); | 1343 | pr_warning("load bpf program failed: %s\n", cp); |
1344 | 1344 | ||
1345 | if (log_buf && log_buf[0] != '\0') { | 1345 | if (log_buf && log_buf[0] != '\0') { |
@@ -1654,7 +1654,7 @@ static int check_path(const char *path) | |||
1654 | 1654 | ||
1655 | dir = dirname(dname); | 1655 | dir = dirname(dname); |
1656 | if (statfs(dir, &st_fs)) { | 1656 | if (statfs(dir, &st_fs)) { |
1657 | cp = strerror_r(errno, errmsg, sizeof(errmsg)); | 1657 | cp = str_error(errno, errmsg, sizeof(errmsg)); |
1658 | pr_warning("failed to statfs %s: %s\n", dir, cp); | 1658 | pr_warning("failed to statfs %s: %s\n", dir, cp); |
1659 | err = -errno; | 1659 | err = -errno; |
1660 | } | 1660 | } |
@@ -1690,7 +1690,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, | |||
1690 | } | 1690 | } |
1691 | 1691 | ||
1692 | if (bpf_obj_pin(prog->instances.fds[instance], path)) { | 1692 | if (bpf_obj_pin(prog->instances.fds[instance], path)) { |
1693 | cp = strerror_r(errno, errmsg, sizeof(errmsg)); | 1693 | cp = str_error(errno, errmsg, sizeof(errmsg)); |
1694 | pr_warning("failed to pin program: %s\n", cp); | 1694 | pr_warning("failed to pin program: %s\n", cp); |
1695 | return -errno; | 1695 | return -errno; |
1696 | } | 1696 | } |
@@ -1708,7 +1708,7 @@ static int make_dir(const char *path) | |||
1708 | err = -errno; | 1708 | err = -errno; |
1709 | 1709 | ||
1710 | if (err) { | 1710 | if (err) { |
1711 | cp = strerror_r(-err, errmsg, sizeof(errmsg)); | 1711 | cp = str_error(-err, errmsg, sizeof(errmsg)); |
1712 | pr_warning("failed to mkdir %s: %s\n", path, cp); | 1712 | pr_warning("failed to mkdir %s: %s\n", path, cp); |
1713 | } | 1713 | } |
1714 | return err; | 1714 | return err; |
@@ -1770,7 +1770,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) | |||
1770 | } | 1770 | } |
1771 | 1771 | ||
1772 | if (bpf_obj_pin(map->fd, path)) { | 1772 | if (bpf_obj_pin(map->fd, path)) { |
1773 | cp = strerror_r(errno, errmsg, sizeof(errmsg)); | 1773 | cp = str_error(errno, errmsg, sizeof(errmsg)); |
1774 | pr_warning("failed to pin map: %s\n", cp); | 1774 | pr_warning("failed to pin map: %s\n", cp); |
1775 | return -errno; | 1775 | return -errno; |
1776 | } | 1776 | } |
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c new file mode 100644 index 000000000000..b8798114a357 --- /dev/null +++ b/tools/lib/bpf/str_error.c | |||
@@ -0,0 +1,18 @@ | |||
1 | // SPDX-License-Identifier: LGPL-2.1 | ||
2 | #undef _GNU_SOURCE | ||
3 | #include <string.h> | ||
4 | #include <stdio.h> | ||
5 | #include "str_error.h" | ||
6 | |||
7 | /* | ||
8 | * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl | ||
9 | * libc, while checking strerror_r() return to avoid having to check this in | ||
10 | * all places calling it. | ||
11 | */ | ||
12 | char *str_error(int err, char *dst, int len) | ||
13 | { | ||
14 | int ret = strerror_r(err, dst, len); | ||
15 | if (ret) | ||
16 | snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); | ||
17 | return dst; | ||
18 | } | ||
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h new file mode 100644 index 000000000000..355b1db571d1 --- /dev/null +++ b/tools/lib/bpf/str_error.h | |||
@@ -0,0 +1,6 @@ | |||
1 | // SPDX-License-Identifier: LGPL-2.1 | ||
2 | #ifndef BPF_STR_ERROR | ||
3 | #define BPF_STR_ERROR | ||
4 | |||
5 | char *str_error(int err, char *dst, int len); | ||
6 | #endif // BPF_STR_ERROR | ||
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 42261a9b280e..ac841bc5c35b 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile | |||
@@ -280,7 +280,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt | |||
280 | mv $@+ $@ | 280 | mv $@+ $@ |
281 | 281 | ||
282 | ifdef USE_ASCIIDOCTOR | 282 | ifdef USE_ASCIIDOCTOR |
283 | $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt | 283 | $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt |
284 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | 284 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ |
285 | $(ASCIIDOC) -b manpage -d manpage \ | 285 | $(ASCIIDOC) -b manpage -d manpage \ |
286 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | 286 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ |
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c92053bc3f96..11b98b2b0486 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
@@ -120,8 +120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
120 | { | 120 | { |
121 | int ret, cpu; | 121 | int ret, cpu; |
122 | 122 | ||
123 | if (type) | 123 | ret = kvm_arm_setup_stage2(kvm, type); |
124 | return -EINVAL; | 124 | if (ret) |
125 | return ret; | ||
125 | 126 | ||
126 | kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); | 127 | kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); |
127 | if (!kvm->arch.last_vcpu_ran) | 128 | if (!kvm->arch.last_vcpu_ran) |
@@ -212,6 +213,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
212 | case KVM_CAP_READONLY_MEM: | 213 | case KVM_CAP_READONLY_MEM: |
213 | case KVM_CAP_MP_STATE: | 214 | case KVM_CAP_MP_STATE: |
214 | case KVM_CAP_IMMEDIATE_EXIT: | 215 | case KVM_CAP_IMMEDIATE_EXIT: |
216 | case KVM_CAP_VCPU_EVENTS: | ||
215 | r = 1; | 217 | r = 1; |
216 | break; | 218 | break; |
217 | case KVM_CAP_ARM_SET_DEVICE_ADDR: | 219 | case KVM_CAP_ARM_SET_DEVICE_ADDR: |
@@ -240,7 +242,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
240 | r = 1; | 242 | r = 1; |
241 | break; | 243 | break; |
242 | default: | 244 | default: |
243 | r = kvm_arch_dev_ioctl_check_extension(kvm, ext); | 245 | r = kvm_arch_vm_ioctl_check_extension(kvm, ext); |
244 | break; | 246 | break; |
245 | } | 247 | } |
246 | return r; | 248 | return r; |
@@ -544,7 +546,7 @@ static void update_vttbr(struct kvm *kvm) | |||
544 | 546 | ||
545 | /* update vttbr to be used with the new vmid */ | 547 | /* update vttbr to be used with the new vmid */ |
546 | pgd_phys = virt_to_phys(kvm->arch.pgd); | 548 | pgd_phys = virt_to_phys(kvm->arch.pgd); |
547 | BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); | 549 | BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)); |
548 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); | 550 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); |
549 | kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; | 551 | kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; |
550 | 552 | ||
@@ -1295,8 +1297,6 @@ static void cpu_init_hyp_mode(void *dummy) | |||
1295 | 1297 | ||
1296 | __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); | 1298 | __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); |
1297 | __cpu_init_stage2(); | 1299 | __cpu_init_stage2(); |
1298 | |||
1299 | kvm_arm_init_debug(); | ||
1300 | } | 1300 | } |
1301 | 1301 | ||
1302 | static void cpu_hyp_reset(void) | 1302 | static void cpu_hyp_reset(void) |
@@ -1309,16 +1309,12 @@ static void cpu_hyp_reinit(void) | |||
1309 | { | 1309 | { |
1310 | cpu_hyp_reset(); | 1310 | cpu_hyp_reset(); |
1311 | 1311 | ||
1312 | if (is_kernel_in_hyp_mode()) { | 1312 | if (is_kernel_in_hyp_mode()) |
1313 | /* | ||
1314 | * __cpu_init_stage2() is safe to call even if the PM | ||
1315 | * event was cancelled before the CPU was reset. | ||
1316 | */ | ||
1317 | __cpu_init_stage2(); | ||
1318 | kvm_timer_init_vhe(); | 1313 | kvm_timer_init_vhe(); |
1319 | } else { | 1314 | else |
1320 | cpu_init_hyp_mode(NULL); | 1315 | cpu_init_hyp_mode(NULL); |
1321 | } | 1316 | |
1317 | kvm_arm_init_debug(); | ||
1322 | 1318 | ||
1323 | if (vgic_present) | 1319 | if (vgic_present) |
1324 | kvm_vgic_init_cpu_hardware(); | 1320 | kvm_vgic_init_cpu_hardware(); |
@@ -1412,6 +1408,8 @@ static int init_common_resources(void) | |||
1412 | kvm_vmid_bits = kvm_get_vmid_bits(); | 1408 | kvm_vmid_bits = kvm_get_vmid_bits(); |
1413 | kvm_info("%d-bit VMID\n", kvm_vmid_bits); | 1409 | kvm_info("%d-bit VMID\n", kvm_vmid_bits); |
1414 | 1410 | ||
1411 | kvm_set_ipa_limit(); | ||
1412 | |||
1415 | return 0; | 1413 | return 0; |
1416 | } | 1414 | } |
1417 | 1415 | ||
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ed162a6c57c5..c23a1b323aad 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
@@ -45,7 +45,6 @@ static phys_addr_t hyp_idmap_vector; | |||
45 | 45 | ||
46 | static unsigned long io_map_base; | 46 | static unsigned long io_map_base; |
47 | 47 | ||
48 | #define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) | ||
49 | #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) | 48 | #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) |
50 | 49 | ||
51 | #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) | 50 | #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) |
@@ -150,20 +149,20 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) | |||
150 | 149 | ||
151 | static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) | 150 | static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) |
152 | { | 151 | { |
153 | pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); | 152 | pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL); |
154 | stage2_pgd_clear(pgd); | 153 | stage2_pgd_clear(kvm, pgd); |
155 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 154 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
156 | stage2_pud_free(pud_table); | 155 | stage2_pud_free(kvm, pud_table); |
157 | put_page(virt_to_page(pgd)); | 156 | put_page(virt_to_page(pgd)); |
158 | } | 157 | } |
159 | 158 | ||
160 | static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) | 159 | static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) |
161 | { | 160 | { |
162 | pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); | 161 | pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); |
163 | VM_BUG_ON(stage2_pud_huge(*pud)); | 162 | VM_BUG_ON(stage2_pud_huge(kvm, *pud)); |
164 | stage2_pud_clear(pud); | 163 | stage2_pud_clear(kvm, pud); |
165 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 164 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
166 | stage2_pmd_free(pmd_table); | 165 | stage2_pmd_free(kvm, pmd_table); |
167 | put_page(virt_to_page(pud)); | 166 | put_page(virt_to_page(pud)); |
168 | } | 167 | } |
169 | 168 | ||
@@ -252,7 +251,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, | |||
252 | } | 251 | } |
253 | } while (pte++, addr += PAGE_SIZE, addr != end); | 252 | } while (pte++, addr += PAGE_SIZE, addr != end); |
254 | 253 | ||
255 | if (stage2_pte_table_empty(start_pte)) | 254 | if (stage2_pte_table_empty(kvm, start_pte)) |
256 | clear_stage2_pmd_entry(kvm, pmd, start_addr); | 255 | clear_stage2_pmd_entry(kvm, pmd, start_addr); |
257 | } | 256 | } |
258 | 257 | ||
@@ -262,9 +261,9 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, | |||
262 | phys_addr_t next, start_addr = addr; | 261 | phys_addr_t next, start_addr = addr; |
263 | pmd_t *pmd, *start_pmd; | 262 | pmd_t *pmd, *start_pmd; |
264 | 263 | ||
265 | start_pmd = pmd = stage2_pmd_offset(pud, addr); | 264 | start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr); |
266 | do { | 265 | do { |
267 | next = stage2_pmd_addr_end(addr, end); | 266 | next = stage2_pmd_addr_end(kvm, addr, end); |
268 | if (!pmd_none(*pmd)) { | 267 | if (!pmd_none(*pmd)) { |
269 | if (pmd_thp_or_huge(*pmd)) { | 268 | if (pmd_thp_or_huge(*pmd)) { |
270 | pmd_t old_pmd = *pmd; | 269 | pmd_t old_pmd = *pmd; |
@@ -281,7 +280,7 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, | |||
281 | } | 280 | } |
282 | } while (pmd++, addr = next, addr != end); | 281 | } while (pmd++, addr = next, addr != end); |
283 | 282 | ||
284 | if (stage2_pmd_table_empty(start_pmd)) | 283 | if (stage2_pmd_table_empty(kvm, start_pmd)) |
285 | clear_stage2_pud_entry(kvm, pud, start_addr); | 284 | clear_stage2_pud_entry(kvm, pud, start_addr); |
286 | } | 285 | } |
287 | 286 | ||
@@ -291,14 +290,14 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, | |||
291 | phys_addr_t next, start_addr = addr; | 290 | phys_addr_t next, start_addr = addr; |
292 | pud_t *pud, *start_pud; | 291 | pud_t *pud, *start_pud; |
293 | 292 | ||
294 | start_pud = pud = stage2_pud_offset(pgd, addr); | 293 | start_pud = pud = stage2_pud_offset(kvm, pgd, addr); |
295 | do { | 294 | do { |
296 | next = stage2_pud_addr_end(addr, end); | 295 | next = stage2_pud_addr_end(kvm, addr, end); |
297 | if (!stage2_pud_none(*pud)) { | 296 | if (!stage2_pud_none(kvm, *pud)) { |
298 | if (stage2_pud_huge(*pud)) { | 297 | if (stage2_pud_huge(kvm, *pud)) { |
299 | pud_t old_pud = *pud; | 298 | pud_t old_pud = *pud; |
300 | 299 | ||
301 | stage2_pud_clear(pud); | 300 | stage2_pud_clear(kvm, pud); |
302 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 301 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
303 | kvm_flush_dcache_pud(old_pud); | 302 | kvm_flush_dcache_pud(old_pud); |
304 | put_page(virt_to_page(pud)); | 303 | put_page(virt_to_page(pud)); |
@@ -308,7 +307,7 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, | |||
308 | } | 307 | } |
309 | } while (pud++, addr = next, addr != end); | 308 | } while (pud++, addr = next, addr != end); |
310 | 309 | ||
311 | if (stage2_pud_table_empty(start_pud)) | 310 | if (stage2_pud_table_empty(kvm, start_pud)) |
312 | clear_stage2_pgd_entry(kvm, pgd, start_addr); | 311 | clear_stage2_pgd_entry(kvm, pgd, start_addr); |
313 | } | 312 | } |
314 | 313 | ||
@@ -332,7 +331,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) | |||
332 | assert_spin_locked(&kvm->mmu_lock); | 331 | assert_spin_locked(&kvm->mmu_lock); |
333 | WARN_ON(size & ~PAGE_MASK); | 332 | WARN_ON(size & ~PAGE_MASK); |
334 | 333 | ||
335 | pgd = kvm->arch.pgd + stage2_pgd_index(addr); | 334 | pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); |
336 | do { | 335 | do { |
337 | /* | 336 | /* |
338 | * Make sure the page table is still active, as another thread | 337 | * Make sure the page table is still active, as another thread |
@@ -341,8 +340,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) | |||
341 | */ | 340 | */ |
342 | if (!READ_ONCE(kvm->arch.pgd)) | 341 | if (!READ_ONCE(kvm->arch.pgd)) |
343 | break; | 342 | break; |
344 | next = stage2_pgd_addr_end(addr, end); | 343 | next = stage2_pgd_addr_end(kvm, addr, end); |
345 | if (!stage2_pgd_none(*pgd)) | 344 | if (!stage2_pgd_none(kvm, *pgd)) |
346 | unmap_stage2_puds(kvm, pgd, addr, next); | 345 | unmap_stage2_puds(kvm, pgd, addr, next); |
347 | /* | 346 | /* |
348 | * If the range is too large, release the kvm->mmu_lock | 347 | * If the range is too large, release the kvm->mmu_lock |
@@ -371,9 +370,9 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, | |||
371 | pmd_t *pmd; | 370 | pmd_t *pmd; |
372 | phys_addr_t next; | 371 | phys_addr_t next; |
373 | 372 | ||
374 | pmd = stage2_pmd_offset(pud, addr); | 373 | pmd = stage2_pmd_offset(kvm, pud, addr); |
375 | do { | 374 | do { |
376 | next = stage2_pmd_addr_end(addr, end); | 375 | next = stage2_pmd_addr_end(kvm, addr, end); |
377 | if (!pmd_none(*pmd)) { | 376 | if (!pmd_none(*pmd)) { |
378 | if (pmd_thp_or_huge(*pmd)) | 377 | if (pmd_thp_or_huge(*pmd)) |
379 | kvm_flush_dcache_pmd(*pmd); | 378 | kvm_flush_dcache_pmd(*pmd); |
@@ -389,11 +388,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, | |||
389 | pud_t *pud; | 388 | pud_t *pud; |
390 | phys_addr_t next; | 389 | phys_addr_t next; |
391 | 390 | ||
392 | pud = stage2_pud_offset(pgd, addr); | 391 | pud = stage2_pud_offset(kvm, pgd, addr); |
393 | do { | 392 | do { |
394 | next = stage2_pud_addr_end(addr, end); | 393 | next = stage2_pud_addr_end(kvm, addr, end); |
395 | if (!stage2_pud_none(*pud)) { | 394 | if (!stage2_pud_none(kvm, *pud)) { |
396 | if (stage2_pud_huge(*pud)) | 395 | if (stage2_pud_huge(kvm, *pud)) |
397 | kvm_flush_dcache_pud(*pud); | 396 | kvm_flush_dcache_pud(*pud); |
398 | else | 397 | else |
399 | stage2_flush_pmds(kvm, pud, addr, next); | 398 | stage2_flush_pmds(kvm, pud, addr, next); |
@@ -409,10 +408,11 @@ static void stage2_flush_memslot(struct kvm *kvm, | |||
409 | phys_addr_t next; | 408 | phys_addr_t next; |
410 | pgd_t *pgd; | 409 | pgd_t *pgd; |
411 | 410 | ||
412 | pgd = kvm->arch.pgd + stage2_pgd_index(addr); | 411 | pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); |
413 | do { | 412 | do { |
414 | next = stage2_pgd_addr_end(addr, end); | 413 | next = stage2_pgd_addr_end(kvm, addr, end); |
415 | stage2_flush_puds(kvm, pgd, addr, next); | 414 | if (!stage2_pgd_none(kvm, *pgd)) |
415 | stage2_flush_puds(kvm, pgd, addr, next); | ||
416 | } while (pgd++, addr = next, addr != end); | 416 | } while (pgd++, addr = next, addr != end); |
417 | } | 417 | } |
418 | 418 | ||
@@ -897,7 +897,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
897 | } | 897 | } |
898 | 898 | ||
899 | /* Allocate the HW PGD, making sure that each page gets its own refcount */ | 899 | /* Allocate the HW PGD, making sure that each page gets its own refcount */ |
900 | pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); | 900 | pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO); |
901 | if (!pgd) | 901 | if (!pgd) |
902 | return -ENOMEM; | 902 | return -ENOMEM; |
903 | 903 | ||
@@ -986,7 +986,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) | |||
986 | 986 | ||
987 | spin_lock(&kvm->mmu_lock); | 987 | spin_lock(&kvm->mmu_lock); |
988 | if (kvm->arch.pgd) { | 988 | if (kvm->arch.pgd) { |
989 | unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); | 989 | unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); |
990 | pgd = READ_ONCE(kvm->arch.pgd); | 990 | pgd = READ_ONCE(kvm->arch.pgd); |
991 | kvm->arch.pgd = NULL; | 991 | kvm->arch.pgd = NULL; |
992 | } | 992 | } |
@@ -994,7 +994,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) | |||
994 | 994 | ||
995 | /* Free the HW pgd, one page at a time */ | 995 | /* Free the HW pgd, one page at a time */ |
996 | if (pgd) | 996 | if (pgd) |
997 | free_pages_exact(pgd, S2_PGD_SIZE); | 997 | free_pages_exact(pgd, stage2_pgd_size(kvm)); |
998 | } | 998 | } |
999 | 999 | ||
1000 | static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | 1000 | static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, |
@@ -1003,16 +1003,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache | |||
1003 | pgd_t *pgd; | 1003 | pgd_t *pgd; |
1004 | pud_t *pud; | 1004 | pud_t *pud; |
1005 | 1005 | ||
1006 | pgd = kvm->arch.pgd + stage2_pgd_index(addr); | 1006 | pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); |
1007 | if (WARN_ON(stage2_pgd_none(*pgd))) { | 1007 | if (stage2_pgd_none(kvm, *pgd)) { |
1008 | if (!cache) | 1008 | if (!cache) |
1009 | return NULL; | 1009 | return NULL; |
1010 | pud = mmu_memory_cache_alloc(cache); | 1010 | pud = mmu_memory_cache_alloc(cache); |
1011 | stage2_pgd_populate(pgd, pud); | 1011 | stage2_pgd_populate(kvm, pgd, pud); |
1012 | get_page(virt_to_page(pgd)); | 1012 | get_page(virt_to_page(pgd)); |
1013 | } | 1013 | } |
1014 | 1014 | ||
1015 | return stage2_pud_offset(pgd, addr); | 1015 | return stage2_pud_offset(kvm, pgd, addr); |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | 1018 | static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, |
@@ -1025,15 +1025,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache | |||
1025 | if (!pud) | 1025 | if (!pud) |
1026 | return NULL; | 1026 | return NULL; |
1027 | 1027 | ||
1028 | if (stage2_pud_none(*pud)) { | 1028 | if (stage2_pud_none(kvm, *pud)) { |
1029 | if (!cache) | 1029 | if (!cache) |
1030 | return NULL; | 1030 | return NULL; |
1031 | pmd = mmu_memory_cache_alloc(cache); | 1031 | pmd = mmu_memory_cache_alloc(cache); |
1032 | stage2_pud_populate(pud, pmd); | 1032 | stage2_pud_populate(kvm, pud, pmd); |
1033 | get_page(virt_to_page(pud)); | 1033 | get_page(virt_to_page(pud)); |
1034 | } | 1034 | } |
1035 | 1035 | ||
1036 | return stage2_pmd_offset(pud, addr); | 1036 | return stage2_pmd_offset(kvm, pud, addr); |
1037 | } | 1037 | } |
1038 | 1038 | ||
1039 | static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache | 1039 | static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache |
@@ -1207,8 +1207,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, | |||
1207 | if (writable) | 1207 | if (writable) |
1208 | pte = kvm_s2pte_mkwrite(pte); | 1208 | pte = kvm_s2pte_mkwrite(pte); |
1209 | 1209 | ||
1210 | ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, | 1210 | ret = mmu_topup_memory_cache(&cache, |
1211 | KVM_NR_MEM_OBJS); | 1211 | kvm_mmu_cache_min_pages(kvm), |
1212 | KVM_NR_MEM_OBJS); | ||
1212 | if (ret) | 1213 | if (ret) |
1213 | goto out; | 1214 | goto out; |
1214 | spin_lock(&kvm->mmu_lock); | 1215 | spin_lock(&kvm->mmu_lock); |
@@ -1230,8 +1231,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) | |||
1230 | { | 1231 | { |
1231 | kvm_pfn_t pfn = *pfnp; | 1232 | kvm_pfn_t pfn = *pfnp; |
1232 | gfn_t gfn = *ipap >> PAGE_SHIFT; | 1233 | gfn_t gfn = *ipap >> PAGE_SHIFT; |
1234 | struct page *page = pfn_to_page(pfn); | ||
1233 | 1235 | ||
1234 | if (PageTransCompoundMap(pfn_to_page(pfn))) { | 1236 | /* |
1237 | * PageTransCompoungMap() returns true for THP and | ||
1238 | * hugetlbfs. Make sure the adjustment is done only for THP | ||
1239 | * pages. | ||
1240 | */ | ||
1241 | if (!PageHuge(page) && PageTransCompoundMap(page)) { | ||
1235 | unsigned long mask; | 1242 | unsigned long mask; |
1236 | /* | 1243 | /* |
1237 | * The address we faulted on is backed by a transparent huge | 1244 | * The address we faulted on is backed by a transparent huge |
@@ -1296,19 +1303,21 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) | |||
1296 | 1303 | ||
1297 | /** | 1304 | /** |
1298 | * stage2_wp_pmds - write protect PUD range | 1305 | * stage2_wp_pmds - write protect PUD range |
1306 | * kvm: kvm instance for the VM | ||
1299 | * @pud: pointer to pud entry | 1307 | * @pud: pointer to pud entry |
1300 | * @addr: range start address | 1308 | * @addr: range start address |
1301 | * @end: range end address | 1309 | * @end: range end address |
1302 | */ | 1310 | */ |
1303 | static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) | 1311 | static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, |
1312 | phys_addr_t addr, phys_addr_t end) | ||
1304 | { | 1313 | { |
1305 | pmd_t *pmd; | 1314 | pmd_t *pmd; |
1306 | phys_addr_t next; | 1315 | phys_addr_t next; |
1307 | 1316 | ||
1308 | pmd = stage2_pmd_offset(pud, addr); | 1317 | pmd = stage2_pmd_offset(kvm, pud, addr); |
1309 | 1318 | ||
1310 | do { | 1319 | do { |
1311 | next = stage2_pmd_addr_end(addr, end); | 1320 | next = stage2_pmd_addr_end(kvm, addr, end); |
1312 | if (!pmd_none(*pmd)) { | 1321 | if (!pmd_none(*pmd)) { |
1313 | if (pmd_thp_or_huge(*pmd)) { | 1322 | if (pmd_thp_or_huge(*pmd)) { |
1314 | if (!kvm_s2pmd_readonly(pmd)) | 1323 | if (!kvm_s2pmd_readonly(pmd)) |
@@ -1328,18 +1337,19 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) | |||
1328 | * | 1337 | * |
1329 | * Process PUD entries, for a huge PUD we cause a panic. | 1338 | * Process PUD entries, for a huge PUD we cause a panic. |
1330 | */ | 1339 | */ |
1331 | static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) | 1340 | static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, |
1341 | phys_addr_t addr, phys_addr_t end) | ||
1332 | { | 1342 | { |
1333 | pud_t *pud; | 1343 | pud_t *pud; |
1334 | phys_addr_t next; | 1344 | phys_addr_t next; |
1335 | 1345 | ||
1336 | pud = stage2_pud_offset(pgd, addr); | 1346 | pud = stage2_pud_offset(kvm, pgd, addr); |
1337 | do { | 1347 | do { |
1338 | next = stage2_pud_addr_end(addr, end); | 1348 | next = stage2_pud_addr_end(kvm, addr, end); |
1339 | if (!stage2_pud_none(*pud)) { | 1349 | if (!stage2_pud_none(kvm, *pud)) { |
1340 | /* TODO:PUD not supported, revisit later if supported */ | 1350 | /* TODO:PUD not supported, revisit later if supported */ |
1341 | BUG_ON(stage2_pud_huge(*pud)); | 1351 | BUG_ON(stage2_pud_huge(kvm, *pud)); |
1342 | stage2_wp_pmds(pud, addr, next); | 1352 | stage2_wp_pmds(kvm, pud, addr, next); |
1343 | } | 1353 | } |
1344 | } while (pud++, addr = next, addr != end); | 1354 | } while (pud++, addr = next, addr != end); |
1345 | } | 1355 | } |
@@ -1355,7 +1365,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | |||
1355 | pgd_t *pgd; | 1365 | pgd_t *pgd; |
1356 | phys_addr_t next; | 1366 | phys_addr_t next; |
1357 | 1367 | ||
1358 | pgd = kvm->arch.pgd + stage2_pgd_index(addr); | 1368 | pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); |
1359 | do { | 1369 | do { |
1360 | /* | 1370 | /* |
1361 | * Release kvm_mmu_lock periodically if the memory region is | 1371 | * Release kvm_mmu_lock periodically if the memory region is |
@@ -1369,9 +1379,9 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | |||
1369 | cond_resched_lock(&kvm->mmu_lock); | 1379 | cond_resched_lock(&kvm->mmu_lock); |
1370 | if (!READ_ONCE(kvm->arch.pgd)) | 1380 | if (!READ_ONCE(kvm->arch.pgd)) |
1371 | break; | 1381 | break; |
1372 | next = stage2_pgd_addr_end(addr, end); | 1382 | next = stage2_pgd_addr_end(kvm, addr, end); |
1373 | if (stage2_pgd_present(*pgd)) | 1383 | if (stage2_pgd_present(kvm, *pgd)) |
1374 | stage2_wp_puds(pgd, addr, next); | 1384 | stage2_wp_puds(kvm, pgd, addr, next); |
1375 | } while (pgd++, addr = next, addr != end); | 1385 | } while (pgd++, addr = next, addr != end); |
1376 | } | 1386 | } |
1377 | 1387 | ||
@@ -1520,7 +1530,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1520 | up_read(¤t->mm->mmap_sem); | 1530 | up_read(¤t->mm->mmap_sem); |
1521 | 1531 | ||
1522 | /* We need minimum second+third level pages */ | 1532 | /* We need minimum second+third level pages */ |
1523 | ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, | 1533 | ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm), |
1524 | KVM_NR_MEM_OBJS); | 1534 | KVM_NR_MEM_OBJS); |
1525 | if (ret) | 1535 | if (ret) |
1526 | return ret; | 1536 | return ret; |
@@ -1763,7 +1773,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1763 | } | 1773 | } |
1764 | 1774 | ||
1765 | /* Userspace should not be able to register out-of-bounds IPAs */ | 1775 | /* Userspace should not be able to register out-of-bounds IPAs */ |
1766 | VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); | 1776 | VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); |
1767 | 1777 | ||
1768 | if (fault_status == FSC_ACCESS) { | 1778 | if (fault_status == FSC_ACCESS) { |
1769 | handle_access_fault(vcpu, fault_ipa); | 1779 | handle_access_fault(vcpu, fault_ipa); |
@@ -2062,7 +2072,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
2062 | * space addressable by the KVM guest IPA space. | 2072 | * space addressable by the KVM guest IPA space. |
2063 | */ | 2073 | */ |
2064 | if (memslot->base_gfn + memslot->npages >= | 2074 | if (memslot->base_gfn + memslot->npages >= |
2065 | (KVM_PHYS_SIZE >> PAGE_SHIFT)) | 2075 | (kvm_phys_size(kvm) >> PAGE_SHIFT)) |
2066 | return -EFAULT; | 2076 | return -EFAULT; |
2067 | 2077 | ||
2068 | down_read(¤t->mm->mmap_sem); | 2078 | down_read(¤t->mm->mmap_sem); |
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 12502251727e..eb2a390a6c86 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c | |||
@@ -241,13 +241,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, | |||
241 | list_for_each_entry(dev, &(its)->device_list, dev_list) \ | 241 | list_for_each_entry(dev, &(its)->device_list, dev_list) \ |
242 | list_for_each_entry(ite, &(dev)->itt_head, ite_list) | 242 | list_for_each_entry(ite, &(dev)->itt_head, ite_list) |
243 | 243 | ||
244 | /* | ||
245 | * We only implement 48 bits of PA at the moment, although the ITS | ||
246 | * supports more. Let's be restrictive here. | ||
247 | */ | ||
248 | #define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) | ||
249 | #define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) | ||
250 | |||
251 | #define GIC_LPI_OFFSET 8192 | 244 | #define GIC_LPI_OFFSET 8192 |
252 | 245 | ||
253 | #define VITS_TYPER_IDBITS 16 | 246 | #define VITS_TYPER_IDBITS 16 |
@@ -759,6 +752,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, | |||
759 | { | 752 | { |
760 | int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; | 753 | int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; |
761 | u64 indirect_ptr, type = GITS_BASER_TYPE(baser); | 754 | u64 indirect_ptr, type = GITS_BASER_TYPE(baser); |
755 | phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); | ||
762 | int esz = GITS_BASER_ENTRY_SIZE(baser); | 756 | int esz = GITS_BASER_ENTRY_SIZE(baser); |
763 | int index; | 757 | int index; |
764 | gfn_t gfn; | 758 | gfn_t gfn; |
@@ -783,7 +777,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, | |||
783 | if (id >= (l1_tbl_size / esz)) | 777 | if (id >= (l1_tbl_size / esz)) |
784 | return false; | 778 | return false; |
785 | 779 | ||
786 | addr = BASER_ADDRESS(baser) + id * esz; | 780 | addr = base + id * esz; |
787 | gfn = addr >> PAGE_SHIFT; | 781 | gfn = addr >> PAGE_SHIFT; |
788 | 782 | ||
789 | if (eaddr) | 783 | if (eaddr) |
@@ -798,7 +792,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, | |||
798 | 792 | ||
799 | /* Each 1st level entry is represented by a 64-bit value. */ | 793 | /* Each 1st level entry is represented by a 64-bit value. */ |
800 | if (kvm_read_guest_lock(its->dev->kvm, | 794 | if (kvm_read_guest_lock(its->dev->kvm, |
801 | BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), | 795 | base + index * sizeof(indirect_ptr), |
802 | &indirect_ptr, sizeof(indirect_ptr))) | 796 | &indirect_ptr, sizeof(indirect_ptr))) |
803 | return false; | 797 | return false; |
804 | 798 | ||
@@ -808,11 +802,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, | |||
808 | if (!(indirect_ptr & BIT_ULL(63))) | 802 | if (!(indirect_ptr & BIT_ULL(63))) |
809 | return false; | 803 | return false; |
810 | 804 | ||
811 | /* | 805 | /* Mask the guest physical address and calculate the frame number. */ |
812 | * Mask the guest physical address and calculate the frame number. | ||
813 | * Any address beyond our supported 48 bits of PA will be caught | ||
814 | * by the actual check in the final step. | ||
815 | */ | ||
816 | indirect_ptr &= GENMASK_ULL(51, 16); | 806 | indirect_ptr &= GENMASK_ULL(51, 16); |
817 | 807 | ||
818 | /* Find the address of the actual entry */ | 808 | /* Find the address of the actual entry */ |
@@ -1304,9 +1294,6 @@ static u64 vgic_sanitise_its_baser(u64 reg) | |||
1304 | GITS_BASER_OUTER_CACHEABILITY_SHIFT, | 1294 | GITS_BASER_OUTER_CACHEABILITY_SHIFT, |
1305 | vgic_sanitise_outer_cacheability); | 1295 | vgic_sanitise_outer_cacheability); |
1306 | 1296 | ||
1307 | /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */ | ||
1308 | reg &= ~GENMASK_ULL(15, 12); | ||
1309 | |||
1310 | /* We support only one (ITS) page size: 64K */ | 1297 | /* We support only one (ITS) page size: 64K */ |
1311 | reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; | 1298 | reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; |
1312 | 1299 | ||
@@ -1325,11 +1312,8 @@ static u64 vgic_sanitise_its_cbaser(u64 reg) | |||
1325 | GITS_CBASER_OUTER_CACHEABILITY_SHIFT, | 1312 | GITS_CBASER_OUTER_CACHEABILITY_SHIFT, |
1326 | vgic_sanitise_outer_cacheability); | 1313 | vgic_sanitise_outer_cacheability); |
1327 | 1314 | ||
1328 | /* | 1315 | /* Sanitise the physical address to be 64k aligned. */ |
1329 | * Sanitise the physical address to be 64k aligned. | 1316 | reg &= ~GENMASK_ULL(15, 12); |
1330 | * Also limit the physical addresses to 48 bits. | ||
1331 | */ | ||
1332 | reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12)); | ||
1333 | 1317 | ||
1334 | return reg; | 1318 | return reg; |
1335 | } | 1319 | } |
@@ -1375,7 +1359,7 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) | |||
1375 | if (!its->enabled) | 1359 | if (!its->enabled) |
1376 | return; | 1360 | return; |
1377 | 1361 | ||
1378 | cbaser = CBASER_ADDRESS(its->cbaser); | 1362 | cbaser = GITS_CBASER_ADDRESS(its->cbaser); |
1379 | 1363 | ||
1380 | while (its->cwriter != its->creadr) { | 1364 | while (its->cwriter != its->creadr) { |
1381 | int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, | 1365 | int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, |
@@ -2233,7 +2217,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) | |||
2233 | if (!(baser & GITS_BASER_VALID)) | 2217 | if (!(baser & GITS_BASER_VALID)) |
2234 | return 0; | 2218 | return 0; |
2235 | 2219 | ||
2236 | l1_gpa = BASER_ADDRESS(baser); | 2220 | l1_gpa = GITS_BASER_ADDR_48_to_52(baser); |
2237 | 2221 | ||
2238 | if (baser & GITS_BASER_INDIRECT) { | 2222 | if (baser & GITS_BASER_INDIRECT) { |
2239 | l1_esz = GITS_LVL1_ENTRY_SIZE; | 2223 | l1_esz = GITS_LVL1_ENTRY_SIZE; |
@@ -2305,7 +2289,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) | |||
2305 | { | 2289 | { |
2306 | const struct vgic_its_abi *abi = vgic_its_get_abi(its); | 2290 | const struct vgic_its_abi *abi = vgic_its_get_abi(its); |
2307 | u64 baser = its->baser_coll_table; | 2291 | u64 baser = its->baser_coll_table; |
2308 | gpa_t gpa = BASER_ADDRESS(baser); | 2292 | gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser); |
2309 | struct its_collection *collection; | 2293 | struct its_collection *collection; |
2310 | u64 val; | 2294 | u64 val; |
2311 | size_t max_size, filled = 0; | 2295 | size_t max_size, filled = 0; |
@@ -2354,7 +2338,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) | |||
2354 | if (!(baser & GITS_BASER_VALID)) | 2338 | if (!(baser & GITS_BASER_VALID)) |
2355 | return 0; | 2339 | return 0; |
2356 | 2340 | ||
2357 | gpa = BASER_ADDRESS(baser); | 2341 | gpa = GITS_BASER_ADDR_48_to_52(baser); |
2358 | 2342 | ||
2359 | max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; | 2343 | max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; |
2360 | 2344 | ||
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 6ada2432e37c..114dce9f4bf5 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c | |||
@@ -25,7 +25,7 @@ | |||
25 | int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, | 25 | int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, |
26 | phys_addr_t addr, phys_addr_t alignment) | 26 | phys_addr_t addr, phys_addr_t alignment) |
27 | { | 27 | { |
28 | if (addr & ~KVM_PHYS_MASK) | 28 | if (addr & ~kvm_phys_mask(kvm)) |
29 | return -E2BIG; | 29 | return -E2BIG; |
30 | 30 | ||
31 | if (!IS_ALIGNED(addr, alignment)) | 31 | if (!IS_ALIGNED(addr, alignment)) |
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index a2a175b08b17..b3d1f0985117 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c | |||
@@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg) | |||
364 | vgic_sanitise_outer_cacheability); | 364 | vgic_sanitise_outer_cacheability); |
365 | 365 | ||
366 | reg &= ~PENDBASER_RES0_MASK; | 366 | reg &= ~PENDBASER_RES0_MASK; |
367 | reg &= ~GENMASK_ULL(51, 48); | ||
368 | 367 | ||
369 | return reg; | 368 | return reg; |
370 | } | 369 | } |
@@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg) | |||
382 | vgic_sanitise_outer_cacheability); | 381 | vgic_sanitise_outer_cacheability); |
383 | 382 | ||
384 | reg &= ~PROPBASER_RES0_MASK; | 383 | reg &= ~PROPBASER_RES0_MASK; |
385 | reg &= ~GENMASK_ULL(51, 48); | ||
386 | return reg; | 384 | return reg; |
387 | } | 385 | } |
388 | 386 | ||