diff options
author | Paul Mackerras <paulus@ozlabs.org> | 2017-01-31 03:21:26 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@ozlabs.org> | 2017-01-31 03:21:26 -0500 |
commit | 167c76e05591c2b656c0f329282f453dd46f4ea5 (patch) | |
tree | 25a0af78d28cbec1decab6ea31360882a31426d1 | |
parent | fcd4f3c6d150357a02af8526e69bfebb82dd5d46 (diff) | |
parent | 8cf4ecc0ca9bd9bdc9b4ca0a99f7445a1e74afed (diff) |
Merge remote-tracking branch 'remotes/powerpc/topic/ppc-kvm' into kvm-ppc-next
This merges in the POWER9 radix MMU host and guest support, which
was put into a topic branch because it touches both powerpc and
KVM code.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
33 files changed, 1585 insertions, 183 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 03145b7cafaa..4470671b0c26 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -3201,6 +3201,71 @@ struct kvm_reinject_control { | |||
3201 | pit_reinject = 0 (!reinject mode) is recommended, unless running an old | 3201 | pit_reinject = 0 (!reinject mode) is recommended, unless running an old |
3202 | operating system that uses the PIT for timing (e.g. Linux 2.4.x). | 3202 | operating system that uses the PIT for timing (e.g. Linux 2.4.x). |
3203 | 3203 | ||
3204 | 4.99 KVM_PPC_CONFIGURE_V3_MMU | ||
3205 | |||
3206 | Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 | ||
3207 | Architectures: ppc | ||
3208 | Type: vm ioctl | ||
3209 | Parameters: struct kvm_ppc_mmuv3_cfg (in) | ||
3210 | Returns: 0 on success, | ||
3211 | -EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read, | ||
3212 | -EINVAL if the configuration is invalid | ||
3213 | |||
3214 | This ioctl controls whether the guest will use radix or HPT (hashed | ||
3215 | page table) translation, and sets the pointer to the process table for | ||
3216 | the guest. | ||
3217 | |||
3218 | struct kvm_ppc_mmuv3_cfg { | ||
3219 | __u64 flags; | ||
3220 | __u64 process_table; | ||
3221 | }; | ||
3222 | |||
3223 | There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and | ||
3224 | KVM_PPC_MMUV3_GTSE. KVM_PPC_MMUV3_RADIX, if set, configures the guest | ||
3225 | to use radix tree translation, and if clear, to use HPT translation. | ||
3226 | KVM_PPC_MMUV3_GTSE, if set and if KVM permits it, configures the guest | ||
3227 | to be able to use the global TLB and SLB invalidation instructions; | ||
3228 | if clear, the guest may not use these instructions. | ||
3229 | |||
3230 | The process_table field specifies the address and size of the guest | ||
3231 | process table, which is in the guest's space. This field is formatted | ||
3232 | as the second doubleword of the partition table entry, as defined in | ||
3233 | the Power ISA V3.00, Book III section 5.7.6.1. | ||
3234 | |||
3235 | 4.100 KVM_PPC_GET_RMMU_INFO | ||
3236 | |||
3237 | Capability: KVM_CAP_PPC_RADIX_MMU | ||
3238 | Architectures: ppc | ||
3239 | Type: vm ioctl | ||
3240 | Parameters: struct kvm_ppc_rmmu_info (out) | ||
3241 | Returns: 0 on success, | ||
3242 | -EFAULT if struct kvm_ppc_rmmu_info cannot be written, | ||
3243 | -EINVAL if no useful information can be returned | ||
3244 | |||
3245 | This ioctl returns a structure containing two things: (a) a list | ||
3246 | containing supported radix tree geometries, and (b) a list that maps | ||
3247 | page sizes to put in the "AP" (actual page size) field for the tlbie | ||
3248 | (TLB invalidate entry) instruction. | ||
3249 | |||
3250 | struct kvm_ppc_rmmu_info { | ||
3251 | struct kvm_ppc_radix_geom { | ||
3252 | __u8 page_shift; | ||
3253 | __u8 level_bits[4]; | ||
3254 | __u8 pad[3]; | ||
3255 | } geometries[8]; | ||
3256 | __u32 ap_encodings[8]; | ||
3257 | }; | ||
3258 | |||
3259 | The geometries[] field gives up to 8 supported geometries for the | ||
3260 | radix page table, in terms of the log base 2 of the smallest page | ||
3261 | size, and the number of bits indexed at each level of the tree, from | ||
3262 | the PTE level up to the PGD level in that order. Any unused entries | ||
3263 | will have 0 in the page_shift field. | ||
3264 | |||
3265 | The ap_encodings gives the supported page sizes and their AP field | ||
3266 | encodings, encoded with the AP value in the top 3 bits and the log | ||
3267 | base 2 of the page size in the bottom 6 bits. | ||
3268 | |||
3204 | 5. The kvm_run structure | 3269 | 5. The kvm_run structure |
3205 | ------------------------ | 3270 | ------------------------ |
3206 | 3271 | ||
@@ -3942,3 +4007,21 @@ In order to use SynIC, it has to be activated by setting this | |||
3942 | capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this | 4007 | capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this |
3943 | will disable the use of APIC hardware virtualization even if supported | 4008 | will disable the use of APIC hardware virtualization even if supported |
3944 | by the CPU, as it's incompatible with SynIC auto-EOI behavior. | 4009 | by the CPU, as it's incompatible with SynIC auto-EOI behavior. |
4010 | |||
4011 | 8.3 KVM_CAP_PPC_RADIX_MMU | ||
4012 | |||
4013 | Architectures: ppc | ||
4014 | |||
4015 | This capability, if KVM_CHECK_EXTENSION indicates that it is | ||
4016 | available, means that that the kernel can support guests using the | ||
4017 | radix MMU defined in Power ISA V3.00 (as implemented in the POWER9 | ||
4018 | processor). | ||
4019 | |||
4020 | 8.4 KVM_CAP_PPC_HASH_MMU_V3 | ||
4021 | |||
4022 | Architectures: ppc | ||
4023 | |||
4024 | This capability, if KVM_CHECK_EXTENSION indicates that it is | ||
4025 | available, means that that the kernel can support guests using the | ||
4026 | hashed page table MMU defined in Power ISA V3.00 (as implemented in | ||
4027 | the POWER9 processor), including in-memory segment tables. | ||
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 8afb0e00f7d9..d73e9dfa5237 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h | |||
@@ -44,10 +44,20 @@ struct patb_entry { | |||
44 | }; | 44 | }; |
45 | extern struct patb_entry *partition_tb; | 45 | extern struct patb_entry *partition_tb; |
46 | 46 | ||
47 | /* Bits in patb0 field */ | ||
47 | #define PATB_HR (1UL << 63) | 48 | #define PATB_HR (1UL << 63) |
48 | #define PATB_GR (1UL << 63) | ||
49 | #define RPDB_MASK 0x0ffffffffffff00fUL | 49 | #define RPDB_MASK 0x0ffffffffffff00fUL |
50 | #define RPDB_SHIFT (1UL << 8) | 50 | #define RPDB_SHIFT (1UL << 8) |
51 | #define RTS1_SHIFT 61 /* top 2 bits of radix tree size */ | ||
52 | #define RTS1_MASK (3UL << RTS1_SHIFT) | ||
53 | #define RTS2_SHIFT 5 /* bottom 3 bits of radix tree size */ | ||
54 | #define RTS2_MASK (7UL << RTS2_SHIFT) | ||
55 | #define RPDS_MASK 0x1f /* root page dir. size field */ | ||
56 | |||
57 | /* Bits in patb1 field */ | ||
58 | #define PATB_GR (1UL << 63) /* guest uses radix; must match HR */ | ||
59 | #define PRTS_MASK 0x1f /* process table size field */ | ||
60 | |||
51 | /* | 61 | /* |
52 | * Limit process table to PAGE_SIZE table. This | 62 | * Limit process table to PAGE_SIZE table. This |
53 | * also limit the max pid we can support. | 63 | * also limit the max pid we can support. |
@@ -138,5 +148,11 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, | |||
138 | extern int (*register_process_table)(unsigned long base, unsigned long page_size, | 148 | extern int (*register_process_table)(unsigned long base, unsigned long page_size, |
139 | unsigned long tbl_size); | 149 | unsigned long tbl_size); |
140 | 150 | ||
151 | #ifdef CONFIG_PPC_PSERIES | ||
152 | extern void radix_init_pseries(void); | ||
153 | #else | ||
154 | static inline void radix_init_pseries(void) { }; | ||
155 | #endif | ||
156 | |||
141 | #endif /* __ASSEMBLY__ */ | 157 | #endif /* __ASSEMBLY__ */ |
142 | #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ | 158 | #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ |
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 9a3eee661297..8fa09fa500f0 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h | |||
@@ -97,6 +97,15 @@ | |||
97 | ld reg,PACAKBASE(r13); \ | 97 | ld reg,PACAKBASE(r13); \ |
98 | ori reg,reg,(ABS_ADDR(label))@l; | 98 | ori reg,reg,(ABS_ADDR(label))@l; |
99 | 99 | ||
100 | /* | ||
101 | * Branches from unrelocated code (e.g., interrupts) to labels outside | ||
102 | * head-y require >64K offsets. | ||
103 | */ | ||
104 | #define __LOAD_FAR_HANDLER(reg, label) \ | ||
105 | ld reg,PACAKBASE(r13); \ | ||
106 | ori reg,reg,(ABS_ADDR(label))@l; \ | ||
107 | addis reg,reg,(ABS_ADDR(label))@h; | ||
108 | |||
100 | /* Exception register prefixes */ | 109 | /* Exception register prefixes */ |
101 | #define EXC_HV H | 110 | #define EXC_HV H |
102 | #define EXC_STD | 111 | #define EXC_STD |
@@ -227,13 +236,41 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) | |||
227 | mtctr reg; \ | 236 | mtctr reg; \ |
228 | bctr | 237 | bctr |
229 | 238 | ||
239 | /* | ||
240 | * KVM requires __LOAD_FAR_HANDLER. | ||
241 | * | ||
242 | * __BRANCH_TO_KVM_EXIT branches are also a special case because they | ||
243 | * explicitly use r9 then reload it from PACA before branching. Hence | ||
244 | * the double-underscore. | ||
245 | */ | ||
246 | #define __BRANCH_TO_KVM_EXIT(area, label) \ | ||
247 | mfctr r9; \ | ||
248 | std r9,HSTATE_SCRATCH1(r13); \ | ||
249 | __LOAD_FAR_HANDLER(r9, label); \ | ||
250 | mtctr r9; \ | ||
251 | ld r9,area+EX_R9(r13); \ | ||
252 | bctr | ||
253 | |||
254 | #define BRANCH_TO_KVM(reg, label) \ | ||
255 | __LOAD_FAR_HANDLER(reg, label); \ | ||
256 | mtctr reg; \ | ||
257 | bctr | ||
258 | |||
230 | #else | 259 | #else |
231 | #define BRANCH_TO_COMMON(reg, label) \ | 260 | #define BRANCH_TO_COMMON(reg, label) \ |
232 | b label | 261 | b label |
233 | 262 | ||
263 | #define BRANCH_TO_KVM(reg, label) \ | ||
264 | b label | ||
265 | |||
266 | #define __BRANCH_TO_KVM_EXIT(area, label) \ | ||
267 | ld r9,area+EX_R9(r13); \ | ||
268 | b label | ||
269 | |||
234 | #endif | 270 | #endif |
235 | 271 | ||
236 | #define __KVM_HANDLER_PROLOG(area, n) \ | 272 | |
273 | #define __KVM_HANDLER(area, h, n) \ | ||
237 | BEGIN_FTR_SECTION_NESTED(947) \ | 274 | BEGIN_FTR_SECTION_NESTED(947) \ |
238 | ld r10,area+EX_CFAR(r13); \ | 275 | ld r10,area+EX_CFAR(r13); \ |
239 | std r10,HSTATE_CFAR(r13); \ | 276 | std r10,HSTATE_CFAR(r13); \ |
@@ -243,30 +280,28 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) | |||
243 | std r10,HSTATE_PPR(r13); \ | 280 | std r10,HSTATE_PPR(r13); \ |
244 | END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ | 281 | END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ |
245 | ld r10,area+EX_R10(r13); \ | 282 | ld r10,area+EX_R10(r13); \ |
246 | stw r9,HSTATE_SCRATCH1(r13); \ | ||
247 | ld r9,area+EX_R9(r13); \ | ||
248 | std r12,HSTATE_SCRATCH0(r13); \ | 283 | std r12,HSTATE_SCRATCH0(r13); \ |
249 | 284 | sldi r12,r9,32; \ | |
250 | #define __KVM_HANDLER(area, h, n) \ | 285 | ori r12,r12,(n); \ |
251 | __KVM_HANDLER_PROLOG(area, n) \ | 286 | /* This reloads r9 before branching to kvmppc_interrupt */ \ |
252 | li r12,n; \ | 287 | __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt) |
253 | b kvmppc_interrupt | ||
254 | 288 | ||
255 | #define __KVM_HANDLER_SKIP(area, h, n) \ | 289 | #define __KVM_HANDLER_SKIP(area, h, n) \ |
256 | cmpwi r10,KVM_GUEST_MODE_SKIP; \ | 290 | cmpwi r10,KVM_GUEST_MODE_SKIP; \ |
257 | ld r10,area+EX_R10(r13); \ | ||
258 | beq 89f; \ | 291 | beq 89f; \ |
259 | stw r9,HSTATE_SCRATCH1(r13); \ | ||
260 | BEGIN_FTR_SECTION_NESTED(948) \ | 292 | BEGIN_FTR_SECTION_NESTED(948) \ |
261 | ld r9,area+EX_PPR(r13); \ | 293 | ld r10,area+EX_PPR(r13); \ |
262 | std r9,HSTATE_PPR(r13); \ | 294 | std r10,HSTATE_PPR(r13); \ |
263 | END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ | 295 | END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ |
264 | ld r9,area+EX_R9(r13); \ | 296 | ld r10,area+EX_R10(r13); \ |
265 | std r12,HSTATE_SCRATCH0(r13); \ | 297 | std r12,HSTATE_SCRATCH0(r13); \ |
266 | li r12,n; \ | 298 | sldi r12,r9,32; \ |
267 | b kvmppc_interrupt; \ | 299 | ori r12,r12,(n); \ |
300 | /* This reloads r9 before branching to kvmppc_interrupt */ \ | ||
301 | __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \ | ||
268 | 89: mtocrf 0x80,r9; \ | 302 | 89: mtocrf 0x80,r9; \ |
269 | ld r9,area+EX_R9(r13); \ | 303 | ld r9,area+EX_R9(r13); \ |
304 | ld r10,area+EX_R10(r13); \ | ||
270 | b kvmppc_skip_##h##interrupt | 305 | b kvmppc_skip_##h##interrupt |
271 | 306 | ||
272 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 307 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
@@ -393,12 +428,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) | |||
393 | EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) | 428 | EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) |
394 | 429 | ||
395 | #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ | 430 | #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ |
396 | /* No guest interrupts come through here */ \ | ||
397 | SET_SCRATCH0(r13); /* save r13 */ \ | 431 | SET_SCRATCH0(r13); /* save r13 */ \ |
398 | EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec); | 432 | EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \ |
433 | EXC_HV, KVMTEST_HV, vec); | ||
399 | 434 | ||
400 | #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ | 435 | #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ |
401 | EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ | 436 | EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ |
402 | EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) | 437 | EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) |
403 | 438 | ||
404 | /* This associate vector numbers with bits in paca->irq_happened */ | 439 | /* This associate vector numbers with bits in paca->irq_happened */ |
@@ -475,10 +510,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) | |||
475 | 510 | ||
476 | #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ | 511 | #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ |
477 | _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ | 512 | _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ |
478 | EXC_HV, SOFTEN_NOTEST_HV) | 513 | EXC_HV, SOFTEN_TEST_HV) |
479 | 514 | ||
480 | #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ | 515 | #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ |
481 | EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \ | 516 | EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ |
482 | EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) | 517 | EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) |
483 | 518 | ||
484 | /* | 519 | /* |
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index fca7033839a9..9bd81619d090 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h | |||
@@ -218,7 +218,7 @@ name: | |||
218 | 218 | ||
219 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 219 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
220 | #define TRAMP_KVM_BEGIN(name) \ | 220 | #define TRAMP_KVM_BEGIN(name) \ |
221 | TRAMP_REAL_BEGIN(name) | 221 | TRAMP_VIRT_BEGIN(name) |
222 | #else | 222 | #else |
223 | #define TRAMP_KVM_BEGIN(name) | 223 | #define TRAMP_KVM_BEGIN(name) |
224 | #endif | 224 | #endif |
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 77ff1ba99d1f..54d11b3a6bf7 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h | |||
@@ -276,6 +276,7 @@ | |||
276 | #define H_GET_MPP_X 0x314 | 276 | #define H_GET_MPP_X 0x314 |
277 | #define H_SET_MODE 0x31C | 277 | #define H_SET_MODE 0x31C |
278 | #define H_CLEAR_HPT 0x358 | 278 | #define H_CLEAR_HPT 0x358 |
279 | #define H_REGISTER_PROC_TBL 0x37C | ||
279 | #define H_SIGNAL_SYS_RESET 0x380 | 280 | #define H_SIGNAL_SYS_RESET 0x380 |
280 | #define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET | 281 | #define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET |
281 | 282 | ||
@@ -313,6 +314,16 @@ | |||
313 | #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 | 314 | #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 |
314 | /* >= 0 values are CPU number */ | 315 | /* >= 0 values are CPU number */ |
315 | 316 | ||
317 | /* Flag values used in H_REGISTER_PROC_TBL hcall */ | ||
318 | #define PROC_TABLE_OP_MASK 0x18 | ||
319 | #define PROC_TABLE_DEREG 0x10 | ||
320 | #define PROC_TABLE_NEW 0x18 | ||
321 | #define PROC_TABLE_TYPE_MASK 0x06 | ||
322 | #define PROC_TABLE_HPT_SLB 0x00 | ||
323 | #define PROC_TABLE_HPT_PT 0x02 | ||
324 | #define PROC_TABLE_RADIX 0x04 | ||
325 | #define PROC_TABLE_GTSE 0x01 | ||
326 | |||
316 | #ifndef __ASSEMBLY__ | 327 | #ifndef __ASSEMBLY__ |
317 | 328 | ||
318 | /** | 329 | /** |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5cf306ae0ac3..2bf35017ffc0 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -170,6 +170,8 @@ extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, | |||
170 | unsigned long status); | 170 | unsigned long status); |
171 | extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, | 171 | extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, |
172 | unsigned long slb_v, unsigned long valid); | 172 | unsigned long slb_v, unsigned long valid); |
173 | extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
174 | unsigned long gpa, gva_t ea, int is_store); | ||
173 | 175 | ||
174 | extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); | 176 | extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); |
175 | extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); | 177 | extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); |
@@ -182,6 +184,25 @@ extern void kvmppc_mmu_hpte_sysexit(void); | |||
182 | extern int kvmppc_mmu_hv_init(void); | 184 | extern int kvmppc_mmu_hv_init(void); |
183 | extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); | 185 | extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); |
184 | 186 | ||
187 | extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, | ||
188 | struct kvm_vcpu *vcpu, | ||
189 | unsigned long ea, unsigned long dsisr); | ||
190 | extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | ||
191 | struct kvmppc_pte *gpte, bool data, bool iswrite); | ||
192 | extern int kvmppc_init_vm_radix(struct kvm *kvm); | ||
193 | extern void kvmppc_free_radix(struct kvm *kvm); | ||
194 | extern int kvmppc_radix_init(void); | ||
195 | extern void kvmppc_radix_exit(void); | ||
196 | extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
197 | unsigned long gfn); | ||
198 | extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
199 | unsigned long gfn); | ||
200 | extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
201 | unsigned long gfn); | ||
202 | extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, | ||
203 | struct kvm_memory_slot *memslot, unsigned long *map); | ||
204 | extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); | ||
205 | |||
185 | /* XXX remove this export when load_last_inst() is generic */ | 206 | /* XXX remove this export when load_last_inst() is generic */ |
186 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 207 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
187 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); | 208 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); |
@@ -211,8 +232,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
211 | extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | 232 | extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, |
212 | unsigned long pte_index, unsigned long avpn, | 233 | unsigned long pte_index, unsigned long avpn, |
213 | unsigned long *hpret); | 234 | unsigned long *hpret); |
214 | extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, | 235 | extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, |
215 | struct kvm_memory_slot *memslot, unsigned long *map); | 236 | struct kvm_memory_slot *memslot, unsigned long *map); |
237 | extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, | ||
238 | struct kvm_memory_slot *memslot, | ||
239 | unsigned long *map); | ||
216 | extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, | 240 | extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, |
217 | unsigned long mask); | 241 | unsigned long mask); |
218 | extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); | 242 | extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 848292176908..0db010cc4e65 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -36,6 +36,12 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) | |||
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 38 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
39 | |||
40 | static inline bool kvm_is_radix(struct kvm *kvm) | ||
41 | { | ||
42 | return kvm->arch.radix; | ||
43 | } | ||
44 | |||
39 | #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ | 45 | #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ |
40 | #endif | 46 | #endif |
41 | 47 | ||
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e59b172666cd..b2dbeac3f450 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -263,7 +263,11 @@ struct kvm_arch { | |||
263 | unsigned long hpt_mask; | 263 | unsigned long hpt_mask; |
264 | atomic_t hpte_mod_interest; | 264 | atomic_t hpte_mod_interest; |
265 | cpumask_t need_tlb_flush; | 265 | cpumask_t need_tlb_flush; |
266 | cpumask_t cpu_in_guest; | ||
266 | int hpt_cma_alloc; | 267 | int hpt_cma_alloc; |
268 | u8 radix; | ||
269 | pgd_t *pgtable; | ||
270 | u64 process_table; | ||
267 | struct dentry *debugfs_dir; | 271 | struct dentry *debugfs_dir; |
268 | struct dentry *htab_dentry; | 272 | struct dentry *htab_dentry; |
269 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ | 273 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ |
@@ -603,6 +607,7 @@ struct kvm_vcpu_arch { | |||
603 | ulong fault_dar; | 607 | ulong fault_dar; |
604 | u32 fault_dsisr; | 608 | u32 fault_dsisr; |
605 | unsigned long intr_msr; | 609 | unsigned long intr_msr; |
610 | ulong fault_gpa; /* guest real address of page fault (POWER9) */ | ||
606 | #endif | 611 | #endif |
607 | 612 | ||
608 | #ifdef CONFIG_BOOKE | 613 | #ifdef CONFIG_BOOKE |
@@ -657,6 +662,7 @@ struct kvm_vcpu_arch { | |||
657 | int state; | 662 | int state; |
658 | int ptid; | 663 | int ptid; |
659 | int thread_cpu; | 664 | int thread_cpu; |
665 | int prev_cpu; | ||
660 | bool timer_running; | 666 | bool timer_running; |
661 | wait_queue_head_t cpu_run; | 667 | wait_queue_head_t cpu_run; |
662 | 668 | ||
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2da67bf1f2ec..48c760f89590 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -291,6 +291,8 @@ struct kvmppc_ops { | |||
291 | struct irq_bypass_producer *); | 291 | struct irq_bypass_producer *); |
292 | void (*irq_bypass_del_producer)(struct irq_bypass_consumer *, | 292 | void (*irq_bypass_del_producer)(struct irq_bypass_consumer *, |
293 | struct irq_bypass_producer *); | 293 | struct irq_bypass_producer *); |
294 | int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg); | ||
295 | int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); | ||
294 | }; | 296 | }; |
295 | 297 | ||
296 | extern struct kvmppc_ops *kvmppc_hv_ops; | 298 | extern struct kvmppc_ops *kvmppc_hv_ops; |
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 5e57705b4759..8af2546ea593 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h | |||
@@ -121,6 +121,8 @@ struct of_drconf_cell { | |||
121 | #define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */ | 121 | #define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */ |
122 | #define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */ | 122 | #define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */ |
123 | 123 | ||
124 | #define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */ | ||
125 | |||
124 | /* Option vector 2: Open Firmware options supported */ | 126 | /* Option vector 2: Open Firmware options supported */ |
125 | #define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ | 127 | #define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ |
126 | 128 | ||
@@ -151,10 +153,17 @@ struct of_drconf_cell { | |||
151 | #define OV5_XCMO 0x0440 /* Page Coalescing */ | 153 | #define OV5_XCMO 0x0440 /* Page Coalescing */ |
152 | #define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */ | 154 | #define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */ |
153 | #define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ | 155 | #define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ |
154 | #define OV5_PFO_HW_RNG 0x0E80 /* PFO Random Number Generator */ | 156 | #define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */ |
155 | #define OV5_PFO_HW_842 0x0E40 /* PFO Compression Accelerator */ | 157 | #define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */ |
156 | #define OV5_PFO_HW_ENCR 0x0E20 /* PFO Encryption Accelerator */ | 158 | #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ |
157 | #define OV5_SUB_PROCESSORS 0x0F01 /* 1,2,or 4 Sub-Processors supported */ | 159 | #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ |
160 | #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ | ||
161 | #define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ | ||
162 | #define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ | ||
163 | #define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ | ||
164 | #define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ | ||
165 | #define OV5_MMU_SLB 0x1800 /* always use SLB */ | ||
166 | #define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ | ||
158 | 167 | ||
159 | /* Option Vector 6: IBM PAPR hints */ | 168 | /* Option Vector 6: IBM PAPR hints */ |
160 | #define OV6_LINUX 0x02 /* Linux is our OS */ | 169 | #define OV6_LINUX 0x02 /* Linux is our OS */ |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 0d4531aa2052..aa44a83ad3ec 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -274,10 +274,14 @@ | |||
274 | #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ | 274 | #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ |
275 | #define DSISR_NOHPTE 0x40000000 /* no translation found */ | 275 | #define DSISR_NOHPTE 0x40000000 /* no translation found */ |
276 | #define DSISR_PROTFAULT 0x08000000 /* protection fault */ | 276 | #define DSISR_PROTFAULT 0x08000000 /* protection fault */ |
277 | #define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ | ||
277 | #define DSISR_ISSTORE 0x02000000 /* access was a store */ | 278 | #define DSISR_ISSTORE 0x02000000 /* access was a store */ |
278 | #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ | 279 | #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ |
279 | #define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ | 280 | #define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ |
280 | #define DSISR_KEYFAULT 0x00200000 /* Key fault */ | 281 | #define DSISR_KEYFAULT 0x00200000 /* Key fault */ |
282 | #define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */ | ||
283 | #define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */ | ||
284 | #define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */ | ||
281 | #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ | 285 | #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ |
282 | #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ | 286 | #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ |
283 | #define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ | 287 | #define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index e3db3a50127b..4edbe4bb0e8b 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -413,6 +413,26 @@ struct kvm_get_htab_header { | |||
413 | __u16 n_invalid; | 413 | __u16 n_invalid; |
414 | }; | 414 | }; |
415 | 415 | ||
416 | /* For KVM_PPC_CONFIGURE_V3_MMU */ | ||
417 | struct kvm_ppc_mmuv3_cfg { | ||
418 | __u64 flags; | ||
419 | __u64 process_table; /* second doubleword of partition table entry */ | ||
420 | }; | ||
421 | |||
422 | /* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ | ||
423 | #define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ | ||
424 | #define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ | ||
425 | |||
426 | /* For KVM_PPC_GET_RMMU_INFO */ | ||
427 | struct kvm_ppc_rmmu_info { | ||
428 | struct kvm_ppc_radix_geom { | ||
429 | __u8 page_shift; | ||
430 | __u8 level_bits[4]; | ||
431 | __u8 pad[3]; | ||
432 | } geometries[8]; | ||
433 | __u32 ap_encodings[8]; | ||
434 | }; | ||
435 | |||
416 | /* Per-vcpu XICS interrupt controller state */ | 436 | /* Per-vcpu XICS interrupt controller state */ |
417 | #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) | 437 | #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) |
418 | 438 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0601e6a7297c..3afa0ad9837f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -498,6 +498,7 @@ int main(void) | |||
498 | DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); | 498 | DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); |
499 | DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); | 499 | DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); |
500 | DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); | 500 | DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); |
501 | DEFINE(KVM_RADIX, offsetof(struct kvm, arch.radix)); | ||
501 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); | 502 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); |
502 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); | 503 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); |
503 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); | 504 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); |
@@ -537,6 +538,7 @@ int main(void) | |||
537 | DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); | 538 | DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); |
538 | DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); | 539 | DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); |
539 | DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); | 540 | DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); |
541 | DEFINE(VCPU_FAULT_GPA, offsetof(struct kvm_vcpu, arch.fault_gpa)); | ||
540 | DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); | 542 | DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); |
541 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | 543 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); |
542 | DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); | 544 | DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d39d6118c6e9..34a04a5fa468 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
@@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) | |||
142 | lbz r0,HSTATE_HWTHREAD_REQ(r13) | 142 | lbz r0,HSTATE_HWTHREAD_REQ(r13) |
143 | cmpwi r0,0 | 143 | cmpwi r0,0 |
144 | beq 1f | 144 | beq 1f |
145 | b kvm_start_guest | 145 | BRANCH_TO_KVM(r10, kvm_start_guest) |
146 | 1: | 146 | 1: |
147 | #endif | 147 | #endif |
148 | 148 | ||
@@ -717,13 +717,9 @@ hardware_interrupt_hv: | |||
717 | BEGIN_FTR_SECTION | 717 | BEGIN_FTR_SECTION |
718 | _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, | 718 | _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, |
719 | EXC_HV, SOFTEN_TEST_HV) | 719 | EXC_HV, SOFTEN_TEST_HV) |
720 | do_kvm_H0x500: | ||
721 | KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) | ||
722 | FTR_SECTION_ELSE | 720 | FTR_SECTION_ELSE |
723 | _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, | 721 | _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, |
724 | EXC_STD, SOFTEN_TEST_PR) | 722 | EXC_STD, SOFTEN_TEST_PR) |
725 | do_kvm_0x500: | ||
726 | KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) | ||
727 | ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) | 723 | ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) |
728 | EXC_REAL_END(hardware_interrupt, 0x500, 0x600) | 724 | EXC_REAL_END(hardware_interrupt, 0x500, 0x600) |
729 | 725 | ||
@@ -737,6 +733,8 @@ hardware_interrupt_relon_hv: | |||
737 | ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) | 733 | ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) |
738 | EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) | 734 | EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) |
739 | 735 | ||
736 | TRAMP_KVM(PACA_EXGEN, 0x500) | ||
737 | TRAMP_KVM_HV(PACA_EXGEN, 0x500) | ||
740 | EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) | 738 | EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) |
741 | 739 | ||
742 | 740 | ||
@@ -832,6 +830,31 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) | |||
832 | TRAMP_KVM(PACA_EXGEN, 0xb00) | 830 | TRAMP_KVM(PACA_EXGEN, 0xb00) |
833 | EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) | 831 | EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) |
834 | 832 | ||
833 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | ||
834 | /* | ||
835 | * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems | ||
836 | * that support it) before changing to HMT_MEDIUM. That allows the KVM | ||
837 | * code to save that value into the guest state (it is the guest's PPR | ||
838 | * value). Otherwise just change to HMT_MEDIUM as userspace has | ||
839 | * already saved the PPR. | ||
840 | */ | ||
841 | #define SYSCALL_KVMTEST \ | ||
842 | SET_SCRATCH0(r13); \ | ||
843 | GET_PACA(r13); \ | ||
844 | std r9,PACA_EXGEN+EX_R9(r13); \ | ||
845 | OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ | ||
846 | HMT_MEDIUM; \ | ||
847 | std r10,PACA_EXGEN+EX_R10(r13); \ | ||
848 | OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \ | ||
849 | mfcr r9; \ | ||
850 | KVMTEST_PR(0xc00); \ | ||
851 | GET_SCRATCH0(r13) | ||
852 | |||
853 | #else | ||
854 | #define SYSCALL_KVMTEST \ | ||
855 | HMT_MEDIUM | ||
856 | #endif | ||
857 | |||
835 | #define LOAD_SYSCALL_HANDLER(reg) \ | 858 | #define LOAD_SYSCALL_HANDLER(reg) \ |
836 | __LOAD_HANDLER(reg, system_call_common) | 859 | __LOAD_HANDLER(reg, system_call_common) |
837 | 860 | ||
@@ -885,34 +908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ | |||
885 | #endif | 908 | #endif |
886 | 909 | ||
887 | EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) | 910 | EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) |
888 | /* | 911 | SYSCALL_KVMTEST |
889 | * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems | ||
890 | * that support it) before changing to HMT_MEDIUM. That allows the KVM | ||
891 | * code to save that value into the guest state (it is the guest's PPR | ||
892 | * value). Otherwise just change to HMT_MEDIUM as userspace has | ||
893 | * already saved the PPR. | ||
894 | */ | ||
895 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | ||
896 | SET_SCRATCH0(r13) | ||
897 | GET_PACA(r13) | ||
898 | std r9,PACA_EXGEN+EX_R9(r13) | ||
899 | OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); | ||
900 | HMT_MEDIUM; | ||
901 | std r10,PACA_EXGEN+EX_R10(r13) | ||
902 | OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); | ||
903 | mfcr r9 | ||
904 | KVMTEST_PR(0xc00) | ||
905 | GET_SCRATCH0(r13) | ||
906 | #else | ||
907 | HMT_MEDIUM; | ||
908 | #endif | ||
909 | SYSCALL_PSERIES_1 | 912 | SYSCALL_PSERIES_1 |
910 | SYSCALL_PSERIES_2_RFID | 913 | SYSCALL_PSERIES_2_RFID |
911 | SYSCALL_PSERIES_3 | 914 | SYSCALL_PSERIES_3 |
912 | EXC_REAL_END(system_call, 0xc00, 0xd00) | 915 | EXC_REAL_END(system_call, 0xc00, 0xd00) |
913 | 916 | ||
914 | EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) | 917 | EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) |
915 | HMT_MEDIUM | 918 | SYSCALL_KVMTEST |
916 | SYSCALL_PSERIES_1 | 919 | SYSCALL_PSERIES_1 |
917 | SYSCALL_PSERIES_2_DIRECT | 920 | SYSCALL_PSERIES_2_DIRECT |
918 | SYSCALL_PSERIES_3 | 921 | SYSCALL_PSERIES_3 |
@@ -927,7 +930,7 @@ TRAMP_KVM(PACA_EXGEN, 0xd00) | |||
927 | EXC_COMMON(single_step_common, 0xd00, single_step_exception) | 930 | EXC_COMMON(single_step_common, 0xd00, single_step_exception) |
928 | 931 | ||
929 | EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) | 932 | EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) |
930 | EXC_VIRT_NONE(0x4e00, 0x4e20) | 933 | EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x4e20, 0xe00) |
931 | TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) | 934 | TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) |
932 | EXC_COMMON_BEGIN(h_data_storage_common) | 935 | EXC_COMMON_BEGIN(h_data_storage_common) |
933 | mfspr r10,SPRN_HDAR | 936 | mfspr r10,SPRN_HDAR |
@@ -943,7 +946,7 @@ EXC_COMMON_BEGIN(h_data_storage_common) | |||
943 | 946 | ||
944 | 947 | ||
945 | EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) | 948 | EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) |
946 | EXC_VIRT_NONE(0x4e20, 0x4e40) | 949 | EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x4e40, 0xe20) |
947 | TRAMP_KVM_HV(PACA_EXGEN, 0xe20) | 950 | TRAMP_KVM_HV(PACA_EXGEN, 0xe20) |
948 | EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) | 951 | EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) |
949 | 952 | ||
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ec47a939cbdd..358d43f8f84f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c | |||
@@ -649,6 +649,7 @@ static void __init early_cmdline_parse(void) | |||
649 | struct option_vector1 { | 649 | struct option_vector1 { |
650 | u8 byte1; | 650 | u8 byte1; |
651 | u8 arch_versions; | 651 | u8 arch_versions; |
652 | u8 arch_versions3; | ||
652 | } __packed; | 653 | } __packed; |
653 | 654 | ||
654 | struct option_vector2 { | 655 | struct option_vector2 { |
@@ -691,6 +692,9 @@ struct option_vector5 { | |||
691 | u8 reserved2; | 692 | u8 reserved2; |
692 | __be16 reserved3; | 693 | __be16 reserved3; |
693 | u8 subprocessors; | 694 | u8 subprocessors; |
695 | u8 byte22; | ||
696 | u8 intarch; | ||
697 | u8 mmu; | ||
694 | } __packed; | 698 | } __packed; |
695 | 699 | ||
696 | struct option_vector6 { | 700 | struct option_vector6 { |
@@ -700,7 +704,7 @@ struct option_vector6 { | |||
700 | } __packed; | 704 | } __packed; |
701 | 705 | ||
702 | struct ibm_arch_vec { | 706 | struct ibm_arch_vec { |
703 | struct { u32 mask, val; } pvrs[10]; | 707 | struct { u32 mask, val; } pvrs[12]; |
704 | 708 | ||
705 | u8 num_vectors; | 709 | u8 num_vectors; |
706 | 710 | ||
@@ -750,6 +754,14 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { | |||
750 | .val = cpu_to_be32(0x004d0000), | 754 | .val = cpu_to_be32(0x004d0000), |
751 | }, | 755 | }, |
752 | { | 756 | { |
757 | .mask = cpu_to_be32(0xffff0000), /* POWER9 */ | ||
758 | .val = cpu_to_be32(0x004e0000), | ||
759 | }, | ||
760 | { | ||
761 | .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */ | ||
762 | .val = cpu_to_be32(0x0f000005), | ||
763 | }, | ||
764 | { | ||
753 | .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ | 765 | .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ |
754 | .val = cpu_to_be32(0x0f000004), | 766 | .val = cpu_to_be32(0x0f000004), |
755 | }, | 767 | }, |
@@ -774,6 +786,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { | |||
774 | .byte1 = 0, | 786 | .byte1 = 0, |
775 | .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | | 787 | .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | |
776 | OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, | 788 | OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, |
789 | .arch_versions3 = OV1_PPC_3_00, | ||
777 | }, | 790 | }, |
778 | 791 | ||
779 | .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), | 792 | .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), |
@@ -836,6 +849,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { | |||
836 | .reserved2 = 0, | 849 | .reserved2 = 0, |
837 | .reserved3 = 0, | 850 | .reserved3 = 0, |
838 | .subprocessors = 1, | 851 | .subprocessors = 1, |
852 | .intarch = 0, | ||
853 | .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | | ||
854 | OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), | ||
839 | }, | 855 | }, |
840 | 856 | ||
841 | /* option vector 6: IBM PAPR hints */ | 857 | /* option vector 6: IBM PAPR hints */ |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 7dd89b79d038..b87ccde2137a 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -70,7 +70,8 @@ endif | |||
70 | kvm-hv-y += \ | 70 | kvm-hv-y += \ |
71 | book3s_hv.o \ | 71 | book3s_hv.o \ |
72 | book3s_hv_interrupts.o \ | 72 | book3s_hv_interrupts.o \ |
73 | book3s_64_mmu_hv.o | 73 | book3s_64_mmu_hv.o \ |
74 | book3s_64_mmu_radix.o | ||
74 | 75 | ||
75 | kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ | 76 | kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ |
76 | book3s_hv_rm_xics.o | 77 | book3s_hv_rm_xics.o |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 019f008775b9..b6b5c185bd92 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -239,6 +239,7 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, | |||
239 | kvmppc_set_dsisr(vcpu, flags); | 239 | kvmppc_set_dsisr(vcpu, flags); |
240 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); | 240 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); |
241 | } | 241 | } |
242 | EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); /* used by kvm_hv */ | ||
242 | 243 | ||
243 | void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) | 244 | void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) |
244 | { | 245 | { |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index b795dd1ac2ef..9df3d940acec 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -119,6 +119,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
119 | long err = -EBUSY; | 119 | long err = -EBUSY; |
120 | long order; | 120 | long order; |
121 | 121 | ||
122 | if (kvm_is_radix(kvm)) | ||
123 | return -EINVAL; | ||
124 | |||
122 | mutex_lock(&kvm->lock); | 125 | mutex_lock(&kvm->lock); |
123 | if (kvm->arch.hpte_setup_done) { | 126 | if (kvm->arch.hpte_setup_done) { |
124 | kvm->arch.hpte_setup_done = 0; | 127 | kvm->arch.hpte_setup_done = 0; |
@@ -152,12 +155,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
152 | 155 | ||
153 | void kvmppc_free_hpt(struct kvm *kvm) | 156 | void kvmppc_free_hpt(struct kvm *kvm) |
154 | { | 157 | { |
155 | kvmppc_free_lpid(kvm->arch.lpid); | ||
156 | vfree(kvm->arch.revmap); | 158 | vfree(kvm->arch.revmap); |
157 | if (kvm->arch.hpt_cma_alloc) | 159 | if (kvm->arch.hpt_cma_alloc) |
158 | kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), | 160 | kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), |
159 | 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); | 161 | 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); |
160 | else | 162 | else if (kvm->arch.hpt_virt) |
161 | free_pages(kvm->arch.hpt_virt, | 163 | free_pages(kvm->arch.hpt_virt, |
162 | kvm->arch.hpt_order - PAGE_SHIFT); | 164 | kvm->arch.hpt_order - PAGE_SHIFT); |
163 | } | 165 | } |
@@ -392,8 +394,8 @@ static int instruction_is_store(unsigned int instr) | |||
392 | return (instr & mask) != 0; | 394 | return (instr & mask) != 0; |
393 | } | 395 | } |
394 | 396 | ||
395 | static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, | 397 | int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, |
396 | unsigned long gpa, gva_t ea, int is_store) | 398 | unsigned long gpa, gva_t ea, int is_store) |
397 | { | 399 | { |
398 | u32 last_inst; | 400 | u32 last_inst; |
399 | 401 | ||
@@ -458,6 +460,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
458 | unsigned long rcbits; | 460 | unsigned long rcbits; |
459 | long mmio_update; | 461 | long mmio_update; |
460 | 462 | ||
463 | if (kvm_is_radix(kvm)) | ||
464 | return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); | ||
465 | |||
461 | /* | 466 | /* |
462 | * Real-mode code has already searched the HPT and found the | 467 | * Real-mode code has already searched the HPT and found the |
463 | * entry we're interested in. Lock the entry and check that | 468 | * entry we're interested in. Lock the entry and check that |
@@ -695,12 +700,13 @@ static void kvmppc_rmap_reset(struct kvm *kvm) | |||
695 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 700 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
696 | } | 701 | } |
697 | 702 | ||
703 | typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
704 | unsigned long gfn); | ||
705 | |||
698 | static int kvm_handle_hva_range(struct kvm *kvm, | 706 | static int kvm_handle_hva_range(struct kvm *kvm, |
699 | unsigned long start, | 707 | unsigned long start, |
700 | unsigned long end, | 708 | unsigned long end, |
701 | int (*handler)(struct kvm *kvm, | 709 | hva_handler_fn handler) |
702 | unsigned long *rmapp, | ||
703 | unsigned long gfn)) | ||
704 | { | 710 | { |
705 | int ret; | 711 | int ret; |
706 | int retval = 0; | 712 | int retval = 0; |
@@ -725,9 +731,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, | |||
725 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | 731 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); |
726 | 732 | ||
727 | for (; gfn < gfn_end; ++gfn) { | 733 | for (; gfn < gfn_end; ++gfn) { |
728 | gfn_t gfn_offset = gfn - memslot->base_gfn; | 734 | ret = handler(kvm, memslot, gfn); |
729 | |||
730 | ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn); | ||
731 | retval |= ret; | 735 | retval |= ret; |
732 | } | 736 | } |
733 | } | 737 | } |
@@ -736,20 +740,21 @@ static int kvm_handle_hva_range(struct kvm *kvm, | |||
736 | } | 740 | } |
737 | 741 | ||
738 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 742 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, |
739 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 743 | hva_handler_fn handler) |
740 | unsigned long gfn)) | ||
741 | { | 744 | { |
742 | return kvm_handle_hva_range(kvm, hva, hva + 1, handler); | 745 | return kvm_handle_hva_range(kvm, hva, hva + 1, handler); |
743 | } | 746 | } |
744 | 747 | ||
745 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 748 | static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, |
746 | unsigned long gfn) | 749 | unsigned long gfn) |
747 | { | 750 | { |
748 | struct revmap_entry *rev = kvm->arch.revmap; | 751 | struct revmap_entry *rev = kvm->arch.revmap; |
749 | unsigned long h, i, j; | 752 | unsigned long h, i, j; |
750 | __be64 *hptep; | 753 | __be64 *hptep; |
751 | unsigned long ptel, psize, rcbits; | 754 | unsigned long ptel, psize, rcbits; |
755 | unsigned long *rmapp; | ||
752 | 756 | ||
757 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
753 | for (;;) { | 758 | for (;;) { |
754 | lock_rmap(rmapp); | 759 | lock_rmap(rmapp); |
755 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { | 760 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { |
@@ -810,26 +815,36 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
810 | 815 | ||
811 | int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) | 816 | int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) |
812 | { | 817 | { |
813 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | 818 | hva_handler_fn handler; |
819 | |||
820 | handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; | ||
821 | kvm_handle_hva(kvm, hva, handler); | ||
814 | return 0; | 822 | return 0; |
815 | } | 823 | } |
816 | 824 | ||
817 | int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) | 825 | int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) |
818 | { | 826 | { |
819 | kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); | 827 | hva_handler_fn handler; |
828 | |||
829 | handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; | ||
830 | kvm_handle_hva_range(kvm, start, end, handler); | ||
820 | return 0; | 831 | return 0; |
821 | } | 832 | } |
822 | 833 | ||
823 | void kvmppc_core_flush_memslot_hv(struct kvm *kvm, | 834 | void kvmppc_core_flush_memslot_hv(struct kvm *kvm, |
824 | struct kvm_memory_slot *memslot) | 835 | struct kvm_memory_slot *memslot) |
825 | { | 836 | { |
826 | unsigned long *rmapp; | ||
827 | unsigned long gfn; | 837 | unsigned long gfn; |
828 | unsigned long n; | 838 | unsigned long n; |
839 | unsigned long *rmapp; | ||
829 | 840 | ||
830 | rmapp = memslot->arch.rmap; | ||
831 | gfn = memslot->base_gfn; | 841 | gfn = memslot->base_gfn; |
832 | for (n = memslot->npages; n; --n) { | 842 | rmapp = memslot->arch.rmap; |
843 | for (n = memslot->npages; n; --n, ++gfn) { | ||
844 | if (kvm_is_radix(kvm)) { | ||
845 | kvm_unmap_radix(kvm, memslot, gfn); | ||
846 | continue; | ||
847 | } | ||
833 | /* | 848 | /* |
834 | * Testing the present bit without locking is OK because | 849 | * Testing the present bit without locking is OK because |
835 | * the memslot has been marked invalid already, and hence | 850 | * the memslot has been marked invalid already, and hence |
@@ -837,20 +852,21 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, | |||
837 | * thus the present bit can't go from 0 to 1. | 852 | * thus the present bit can't go from 0 to 1. |
838 | */ | 853 | */ |
839 | if (*rmapp & KVMPPC_RMAP_PRESENT) | 854 | if (*rmapp & KVMPPC_RMAP_PRESENT) |
840 | kvm_unmap_rmapp(kvm, rmapp, gfn); | 855 | kvm_unmap_rmapp(kvm, memslot, gfn); |
841 | ++rmapp; | 856 | ++rmapp; |
842 | ++gfn; | ||
843 | } | 857 | } |
844 | } | 858 | } |
845 | 859 | ||
846 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 860 | static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, |
847 | unsigned long gfn) | 861 | unsigned long gfn) |
848 | { | 862 | { |
849 | struct revmap_entry *rev = kvm->arch.revmap; | 863 | struct revmap_entry *rev = kvm->arch.revmap; |
850 | unsigned long head, i, j; | 864 | unsigned long head, i, j; |
851 | __be64 *hptep; | 865 | __be64 *hptep; |
852 | int ret = 0; | 866 | int ret = 0; |
867 | unsigned long *rmapp; | ||
853 | 868 | ||
869 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
854 | retry: | 870 | retry: |
855 | lock_rmap(rmapp); | 871 | lock_rmap(rmapp); |
856 | if (*rmapp & KVMPPC_RMAP_REFERENCED) { | 872 | if (*rmapp & KVMPPC_RMAP_REFERENCED) { |
@@ -898,17 +914,22 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
898 | 914 | ||
899 | int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) | 915 | int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) |
900 | { | 916 | { |
901 | return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); | 917 | hva_handler_fn handler; |
918 | |||
919 | handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp; | ||
920 | return kvm_handle_hva_range(kvm, start, end, handler); | ||
902 | } | 921 | } |
903 | 922 | ||
904 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 923 | static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, |
905 | unsigned long gfn) | 924 | unsigned long gfn) |
906 | { | 925 | { |
907 | struct revmap_entry *rev = kvm->arch.revmap; | 926 | struct revmap_entry *rev = kvm->arch.revmap; |
908 | unsigned long head, i, j; | 927 | unsigned long head, i, j; |
909 | unsigned long *hp; | 928 | unsigned long *hp; |
910 | int ret = 1; | 929 | int ret = 1; |
930 | unsigned long *rmapp; | ||
911 | 931 | ||
932 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
912 | if (*rmapp & KVMPPC_RMAP_REFERENCED) | 933 | if (*rmapp & KVMPPC_RMAP_REFERENCED) |
913 | return 1; | 934 | return 1; |
914 | 935 | ||
@@ -934,12 +955,18 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
934 | 955 | ||
935 | int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) | 956 | int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) |
936 | { | 957 | { |
937 | return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); | 958 | hva_handler_fn handler; |
959 | |||
960 | handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp; | ||
961 | return kvm_handle_hva(kvm, hva, handler); | ||
938 | } | 962 | } |
939 | 963 | ||
940 | void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) | 964 | void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) |
941 | { | 965 | { |
942 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | 966 | hva_handler_fn handler; |
967 | |||
968 | handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; | ||
969 | kvm_handle_hva(kvm, hva, handler); | ||
943 | } | 970 | } |
944 | 971 | ||
945 | static int vcpus_running(struct kvm *kvm) | 972 | static int vcpus_running(struct kvm *kvm) |
@@ -1040,7 +1067,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) | |||
1040 | return npages_dirty; | 1067 | return npages_dirty; |
1041 | } | 1068 | } |
1042 | 1069 | ||
1043 | static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, | 1070 | void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, |
1044 | struct kvm_memory_slot *memslot, | 1071 | struct kvm_memory_slot *memslot, |
1045 | unsigned long *map) | 1072 | unsigned long *map) |
1046 | { | 1073 | { |
@@ -1058,12 +1085,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, | |||
1058 | __set_bit_le(gfn - memslot->base_gfn, map); | 1085 | __set_bit_le(gfn - memslot->base_gfn, map); |
1059 | } | 1086 | } |
1060 | 1087 | ||
1061 | long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, | 1088 | long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, |
1062 | unsigned long *map) | 1089 | struct kvm_memory_slot *memslot, unsigned long *map) |
1063 | { | 1090 | { |
1064 | unsigned long i, j; | 1091 | unsigned long i, j; |
1065 | unsigned long *rmapp; | 1092 | unsigned long *rmapp; |
1066 | struct kvm_vcpu *vcpu; | ||
1067 | 1093 | ||
1068 | preempt_disable(); | 1094 | preempt_disable(); |
1069 | rmapp = memslot->arch.rmap; | 1095 | rmapp = memslot->arch.rmap; |
@@ -1079,15 +1105,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
1079 | __set_bit_le(j, map); | 1105 | __set_bit_le(j, map); |
1080 | ++rmapp; | 1106 | ++rmapp; |
1081 | } | 1107 | } |
1082 | |||
1083 | /* Harvest dirty bits from VPA and DTL updates */ | ||
1084 | /* Note: we never modify the SLB shadow buffer areas */ | ||
1085 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1086 | spin_lock(&vcpu->arch.vpa_update_lock); | ||
1087 | harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); | ||
1088 | harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); | ||
1089 | spin_unlock(&vcpu->arch.vpa_update_lock); | ||
1090 | } | ||
1091 | preempt_enable(); | 1108 | preempt_enable(); |
1092 | return 0; | 1109 | return 0; |
1093 | } | 1110 | } |
@@ -1142,10 +1159,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, | |||
1142 | srcu_idx = srcu_read_lock(&kvm->srcu); | 1159 | srcu_idx = srcu_read_lock(&kvm->srcu); |
1143 | memslot = gfn_to_memslot(kvm, gfn); | 1160 | memslot = gfn_to_memslot(kvm, gfn); |
1144 | if (memslot) { | 1161 | if (memslot) { |
1145 | rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | 1162 | if (!kvm_is_radix(kvm)) { |
1146 | lock_rmap(rmap); | 1163 | rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; |
1147 | *rmap |= KVMPPC_RMAP_CHANGED; | 1164 | lock_rmap(rmap); |
1148 | unlock_rmap(rmap); | 1165 | *rmap |= KVMPPC_RMAP_CHANGED; |
1166 | unlock_rmap(rmap); | ||
1167 | } else if (memslot->dirty_bitmap) { | ||
1168 | mark_page_dirty(kvm, gfn); | ||
1169 | } | ||
1149 | } | 1170 | } |
1150 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 1171 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
1151 | } | 1172 | } |
@@ -1675,7 +1696,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) | |||
1675 | 1696 | ||
1676 | vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ | 1697 | vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ |
1677 | 1698 | ||
1678 | mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; | 1699 | if (kvm_is_radix(vcpu->kvm)) |
1700 | mmu->xlate = kvmppc_mmu_radix_xlate; | ||
1701 | else | ||
1702 | mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; | ||
1679 | mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; | 1703 | mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; |
1680 | 1704 | ||
1681 | vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; | 1705 | vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c new file mode 100644 index 000000000000..4344651f408c --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c | |||
@@ -0,0 +1,716 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/string.h> | ||
11 | #include <linux/kvm.h> | ||
12 | #include <linux/kvm_host.h> | ||
13 | |||
14 | #include <asm/kvm_ppc.h> | ||
15 | #include <asm/kvm_book3s.h> | ||
16 | #include <asm/page.h> | ||
17 | #include <asm/mmu.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/pgalloc.h> | ||
20 | |||
21 | /* | ||
22 | * Supported radix tree geometry. | ||
23 | * Like p9, we support either 5 or 9 bits at the first (lowest) level, | ||
24 | * for a page size of 64k or 4k. | ||
25 | */ | ||
26 | static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; | ||
27 | |||
28 | int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | ||
29 | struct kvmppc_pte *gpte, bool data, bool iswrite) | ||
30 | { | ||
31 | struct kvm *kvm = vcpu->kvm; | ||
32 | u32 pid; | ||
33 | int ret, level, ps; | ||
34 | __be64 prte, rpte; | ||
35 | unsigned long root, pte, index; | ||
36 | unsigned long rts, bits, offset; | ||
37 | unsigned long gpa; | ||
38 | unsigned long proc_tbl_size; | ||
39 | |||
40 | /* Work out effective PID */ | ||
41 | switch (eaddr >> 62) { | ||
42 | case 0: | ||
43 | pid = vcpu->arch.pid; | ||
44 | break; | ||
45 | case 3: | ||
46 | pid = 0; | ||
47 | break; | ||
48 | default: | ||
49 | return -EINVAL; | ||
50 | } | ||
51 | proc_tbl_size = 1 << ((kvm->arch.process_table & PRTS_MASK) + 12); | ||
52 | if (pid * 16 >= proc_tbl_size) | ||
53 | return -EINVAL; | ||
54 | |||
55 | /* Read partition table to find root of tree for effective PID */ | ||
56 | ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16, | ||
57 | &prte, sizeof(prte)); | ||
58 | if (ret) | ||
59 | return ret; | ||
60 | |||
61 | root = be64_to_cpu(prte); | ||
62 | rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) | | ||
63 | ((root & RTS2_MASK) >> RTS2_SHIFT); | ||
64 | bits = root & RPDS_MASK; | ||
65 | root = root & RPDB_MASK; | ||
66 | |||
67 | /* P9 DD1 interprets RTS (radix tree size) differently */ | ||
68 | offset = rts + 31; | ||
69 | if (cpu_has_feature(CPU_FTR_POWER9_DD1)) | ||
70 | offset -= 3; | ||
71 | |||
72 | /* current implementations only support 52-bit space */ | ||
73 | if (offset != 52) | ||
74 | return -EINVAL; | ||
75 | |||
76 | for (level = 3; level >= 0; --level) { | ||
77 | if (level && bits != p9_supported_radix_bits[level]) | ||
78 | return -EINVAL; | ||
79 | if (level == 0 && !(bits == 5 || bits == 9)) | ||
80 | return -EINVAL; | ||
81 | offset -= bits; | ||
82 | index = (eaddr >> offset) & ((1UL << bits) - 1); | ||
83 | /* check that low bits of page table base are zero */ | ||
84 | if (root & ((1UL << (bits + 3)) - 1)) | ||
85 | return -EINVAL; | ||
86 | ret = kvm_read_guest(kvm, root + index * 8, | ||
87 | &rpte, sizeof(rpte)); | ||
88 | if (ret) | ||
89 | return ret; | ||
90 | pte = __be64_to_cpu(rpte); | ||
91 | if (!(pte & _PAGE_PRESENT)) | ||
92 | return -ENOENT; | ||
93 | if (pte & _PAGE_PTE) | ||
94 | break; | ||
95 | bits = pte & 0x1f; | ||
96 | root = pte & 0x0fffffffffffff00ul; | ||
97 | } | ||
98 | /* need a leaf at lowest level; 512GB pages not supported */ | ||
99 | if (level < 0 || level == 3) | ||
100 | return -EINVAL; | ||
101 | |||
102 | /* offset is now log base 2 of the page size */ | ||
103 | gpa = pte & 0x01fffffffffff000ul; | ||
104 | if (gpa & ((1ul << offset) - 1)) | ||
105 | return -EINVAL; | ||
106 | gpa += eaddr & ((1ul << offset) - 1); | ||
107 | for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps) | ||
108 | if (offset == mmu_psize_defs[ps].shift) | ||
109 | break; | ||
110 | gpte->page_size = ps; | ||
111 | |||
112 | gpte->eaddr = eaddr; | ||
113 | gpte->raddr = gpa; | ||
114 | |||
115 | /* Work out permissions */ | ||
116 | gpte->may_read = !!(pte & _PAGE_READ); | ||
117 | gpte->may_write = !!(pte & _PAGE_WRITE); | ||
118 | gpte->may_execute = !!(pte & _PAGE_EXEC); | ||
119 | if (kvmppc_get_msr(vcpu) & MSR_PR) { | ||
120 | if (pte & _PAGE_PRIVILEGED) { | ||
121 | gpte->may_read = 0; | ||
122 | gpte->may_write = 0; | ||
123 | gpte->may_execute = 0; | ||
124 | } | ||
125 | } else { | ||
126 | if (!(pte & _PAGE_PRIVILEGED)) { | ||
127 | /* Check AMR/IAMR to see if strict mode is in force */ | ||
128 | if (vcpu->arch.amr & (1ul << 62)) | ||
129 | gpte->may_read = 0; | ||
130 | if (vcpu->arch.amr & (1ul << 63)) | ||
131 | gpte->may_write = 0; | ||
132 | if (vcpu->arch.iamr & (1ul << 62)) | ||
133 | gpte->may_execute = 0; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | #ifdef CONFIG_PPC_64K_PAGES | ||
141 | #define MMU_BASE_PSIZE MMU_PAGE_64K | ||
142 | #else | ||
143 | #define MMU_BASE_PSIZE MMU_PAGE_4K | ||
144 | #endif | ||
145 | |||
146 | static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, | ||
147 | unsigned int pshift) | ||
148 | { | ||
149 | int psize = MMU_BASE_PSIZE; | ||
150 | |||
151 | if (pshift >= PMD_SHIFT) | ||
152 | psize = MMU_PAGE_2M; | ||
153 | addr &= ~0xfffUL; | ||
154 | addr |= mmu_psize_defs[psize].ap << 5; | ||
155 | asm volatile("ptesync": : :"memory"); | ||
156 | asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) | ||
157 | : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); | ||
158 | asm volatile("ptesync": : :"memory"); | ||
159 | } | ||
160 | |||
161 | unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, | ||
162 | unsigned long clr, unsigned long set, | ||
163 | unsigned long addr, unsigned int shift) | ||
164 | { | ||
165 | unsigned long old = 0; | ||
166 | |||
167 | if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) && | ||
168 | pte_present(*ptep)) { | ||
169 | /* have to invalidate it first */ | ||
170 | old = __radix_pte_update(ptep, _PAGE_PRESENT, 0); | ||
171 | kvmppc_radix_tlbie_page(kvm, addr, shift); | ||
172 | set |= _PAGE_PRESENT; | ||
173 | old &= _PAGE_PRESENT; | ||
174 | } | ||
175 | return __radix_pte_update(ptep, clr, set) | old; | ||
176 | } | ||
177 | |||
178 | void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, | ||
179 | pte_t *ptep, pte_t pte) | ||
180 | { | ||
181 | radix__set_pte_at(kvm->mm, addr, ptep, pte, 0); | ||
182 | } | ||
183 | |||
184 | static struct kmem_cache *kvm_pte_cache; | ||
185 | |||
186 | static pte_t *kvmppc_pte_alloc(void) | ||
187 | { | ||
188 | return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); | ||
189 | } | ||
190 | |||
191 | static void kvmppc_pte_free(pte_t *ptep) | ||
192 | { | ||
193 | kmem_cache_free(kvm_pte_cache, ptep); | ||
194 | } | ||
195 | |||
196 | static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, | ||
197 | unsigned int level, unsigned long mmu_seq) | ||
198 | { | ||
199 | pgd_t *pgd; | ||
200 | pud_t *pud, *new_pud = NULL; | ||
201 | pmd_t *pmd, *new_pmd = NULL; | ||
202 | pte_t *ptep, *new_ptep = NULL; | ||
203 | unsigned long old; | ||
204 | int ret; | ||
205 | |||
206 | /* Traverse the guest's 2nd-level tree, allocate new levels needed */ | ||
207 | pgd = kvm->arch.pgtable + pgd_index(gpa); | ||
208 | pud = NULL; | ||
209 | if (pgd_present(*pgd)) | ||
210 | pud = pud_offset(pgd, gpa); | ||
211 | else | ||
212 | new_pud = pud_alloc_one(kvm->mm, gpa); | ||
213 | |||
214 | pmd = NULL; | ||
215 | if (pud && pud_present(*pud)) | ||
216 | pmd = pmd_offset(pud, gpa); | ||
217 | else | ||
218 | new_pmd = pmd_alloc_one(kvm->mm, gpa); | ||
219 | |||
220 | if (level == 0 && !(pmd && pmd_present(*pmd))) | ||
221 | new_ptep = kvmppc_pte_alloc(); | ||
222 | |||
223 | /* Check if we might have been invalidated; let the guest retry if so */ | ||
224 | spin_lock(&kvm->mmu_lock); | ||
225 | ret = -EAGAIN; | ||
226 | if (mmu_notifier_retry(kvm, mmu_seq)) | ||
227 | goto out_unlock; | ||
228 | |||
229 | /* Now traverse again under the lock and change the tree */ | ||
230 | ret = -ENOMEM; | ||
231 | if (pgd_none(*pgd)) { | ||
232 | if (!new_pud) | ||
233 | goto out_unlock; | ||
234 | pgd_populate(kvm->mm, pgd, new_pud); | ||
235 | new_pud = NULL; | ||
236 | } | ||
237 | pud = pud_offset(pgd, gpa); | ||
238 | if (pud_none(*pud)) { | ||
239 | if (!new_pmd) | ||
240 | goto out_unlock; | ||
241 | pud_populate(kvm->mm, pud, new_pmd); | ||
242 | new_pmd = NULL; | ||
243 | } | ||
244 | pmd = pmd_offset(pud, gpa); | ||
245 | if (pmd_large(*pmd)) { | ||
246 | /* Someone else has instantiated a large page here; retry */ | ||
247 | ret = -EAGAIN; | ||
248 | goto out_unlock; | ||
249 | } | ||
250 | if (level == 1 && !pmd_none(*pmd)) { | ||
251 | /* | ||
252 | * There's a page table page here, but we wanted | ||
253 | * to install a large page. Tell the caller and let | ||
254 | * it try installing a normal page if it wants. | ||
255 | */ | ||
256 | ret = -EBUSY; | ||
257 | goto out_unlock; | ||
258 | } | ||
259 | if (level == 0) { | ||
260 | if (pmd_none(*pmd)) { | ||
261 | if (!new_ptep) | ||
262 | goto out_unlock; | ||
263 | pmd_populate(kvm->mm, pmd, new_ptep); | ||
264 | new_ptep = NULL; | ||
265 | } | ||
266 | ptep = pte_offset_kernel(pmd, gpa); | ||
267 | if (pte_present(*ptep)) { | ||
268 | /* PTE was previously valid, so invalidate it */ | ||
269 | old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, | ||
270 | 0, gpa, 0); | ||
271 | kvmppc_radix_tlbie_page(kvm, gpa, 0); | ||
272 | if (old & _PAGE_DIRTY) | ||
273 | mark_page_dirty(kvm, gpa >> PAGE_SHIFT); | ||
274 | } | ||
275 | kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); | ||
276 | } else { | ||
277 | kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); | ||
278 | } | ||
279 | ret = 0; | ||
280 | |||
281 | out_unlock: | ||
282 | spin_unlock(&kvm->mmu_lock); | ||
283 | if (new_pud) | ||
284 | pud_free(kvm->mm, new_pud); | ||
285 | if (new_pmd) | ||
286 | pmd_free(kvm->mm, new_pmd); | ||
287 | if (new_ptep) | ||
288 | kvmppc_pte_free(new_ptep); | ||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
293 | unsigned long ea, unsigned long dsisr) | ||
294 | { | ||
295 | struct kvm *kvm = vcpu->kvm; | ||
296 | unsigned long mmu_seq, pte_size; | ||
297 | unsigned long gpa, gfn, hva, pfn; | ||
298 | struct kvm_memory_slot *memslot; | ||
299 | struct page *page = NULL, *pages[1]; | ||
300 | long ret, npages, ok; | ||
301 | unsigned int writing; | ||
302 | struct vm_area_struct *vma; | ||
303 | unsigned long flags; | ||
304 | pte_t pte, *ptep; | ||
305 | unsigned long pgflags; | ||
306 | unsigned int shift, level; | ||
307 | |||
308 | /* Check for unusual errors */ | ||
309 | if (dsisr & DSISR_UNSUPP_MMU) { | ||
310 | pr_err("KVM: Got unsupported MMU fault\n"); | ||
311 | return -EFAULT; | ||
312 | } | ||
313 | if (dsisr & DSISR_BADACCESS) { | ||
314 | /* Reflect to the guest as DSI */ | ||
315 | pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr); | ||
316 | kvmppc_core_queue_data_storage(vcpu, ea, dsisr); | ||
317 | return RESUME_GUEST; | ||
318 | } | ||
319 | |||
320 | /* Translate the logical address and get the page */ | ||
321 | gpa = vcpu->arch.fault_gpa & ~0xfffUL; | ||
322 | gpa &= ~0xF000000000000000ul; | ||
323 | gfn = gpa >> PAGE_SHIFT; | ||
324 | if (!(dsisr & DSISR_PGDIRFAULT)) | ||
325 | gpa |= ea & 0xfff; | ||
326 | memslot = gfn_to_memslot(kvm, gfn); | ||
327 | |||
328 | /* No memslot means it's an emulated MMIO region */ | ||
329 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { | ||
330 | if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS | | ||
331 | DSISR_SET_RC)) { | ||
332 | /* | ||
333 | * Bad address in guest page table tree, or other | ||
334 | * unusual error - reflect it to the guest as DSI. | ||
335 | */ | ||
336 | kvmppc_core_queue_data_storage(vcpu, ea, dsisr); | ||
337 | return RESUME_GUEST; | ||
338 | } | ||
339 | return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, | ||
340 | dsisr & DSISR_ISSTORE); | ||
341 | } | ||
342 | |||
343 | /* used to check for invalidations in progress */ | ||
344 | mmu_seq = kvm->mmu_notifier_seq; | ||
345 | smp_rmb(); | ||
346 | |||
347 | writing = (dsisr & DSISR_ISSTORE) != 0; | ||
348 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
349 | if (dsisr & DSISR_SET_RC) { | ||
350 | /* | ||
351 | * Need to set an R or C bit in the 2nd-level tables; | ||
352 | * if the relevant bits aren't already set in the linux | ||
353 | * page tables, fall through to do the gup_fast to | ||
354 | * set them in the linux page tables too. | ||
355 | */ | ||
356 | ok = 0; | ||
357 | pgflags = _PAGE_ACCESSED; | ||
358 | if (writing) | ||
359 | pgflags |= _PAGE_DIRTY; | ||
360 | local_irq_save(flags); | ||
361 | ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva, | ||
362 | NULL, NULL); | ||
363 | if (ptep) { | ||
364 | pte = READ_ONCE(*ptep); | ||
365 | if (pte_present(pte) && | ||
366 | (pte_val(pte) & pgflags) == pgflags) | ||
367 | ok = 1; | ||
368 | } | ||
369 | local_irq_restore(flags); | ||
370 | if (ok) { | ||
371 | spin_lock(&kvm->mmu_lock); | ||
372 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { | ||
373 | spin_unlock(&kvm->mmu_lock); | ||
374 | return RESUME_GUEST; | ||
375 | } | ||
376 | ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, | ||
377 | gpa, NULL, &shift); | ||
378 | if (ptep && pte_present(*ptep)) { | ||
379 | kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, | ||
380 | gpa, shift); | ||
381 | spin_unlock(&kvm->mmu_lock); | ||
382 | return RESUME_GUEST; | ||
383 | } | ||
384 | spin_unlock(&kvm->mmu_lock); | ||
385 | } | ||
386 | } | ||
387 | |||
388 | ret = -EFAULT; | ||
389 | pfn = 0; | ||
390 | pte_size = PAGE_SIZE; | ||
391 | pgflags = _PAGE_READ | _PAGE_EXEC; | ||
392 | level = 0; | ||
393 | npages = get_user_pages_fast(hva, 1, writing, pages); | ||
394 | if (npages < 1) { | ||
395 | /* Check if it's an I/O mapping */ | ||
396 | down_read(¤t->mm->mmap_sem); | ||
397 | vma = find_vma(current->mm, hva); | ||
398 | if (vma && vma->vm_start <= hva && hva < vma->vm_end && | ||
399 | (vma->vm_flags & VM_PFNMAP)) { | ||
400 | pfn = vma->vm_pgoff + | ||
401 | ((hva - vma->vm_start) >> PAGE_SHIFT); | ||
402 | pgflags = pgprot_val(vma->vm_page_prot); | ||
403 | } | ||
404 | up_read(¤t->mm->mmap_sem); | ||
405 | if (!pfn) | ||
406 | return -EFAULT; | ||
407 | } else { | ||
408 | page = pages[0]; | ||
409 | pfn = page_to_pfn(page); | ||
410 | if (PageHuge(page)) { | ||
411 | page = compound_head(page); | ||
412 | pte_size <<= compound_order(page); | ||
413 | /* See if we can insert a 2MB large-page PTE here */ | ||
414 | if (pte_size >= PMD_SIZE && | ||
415 | (gpa & PMD_MASK & PAGE_MASK) == | ||
416 | (hva & PMD_MASK & PAGE_MASK)) { | ||
417 | level = 1; | ||
418 | pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); | ||
419 | } | ||
420 | } | ||
421 | /* See if we can provide write access */ | ||
422 | if (writing) { | ||
423 | /* | ||
424 | * We assume gup_fast has set dirty on the host PTE. | ||
425 | */ | ||
426 | pgflags |= _PAGE_WRITE; | ||
427 | } else { | ||
428 | local_irq_save(flags); | ||
429 | ptep = __find_linux_pte_or_hugepte(current->mm->pgd, | ||
430 | hva, NULL, NULL); | ||
431 | if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) | ||
432 | pgflags |= _PAGE_WRITE; | ||
433 | local_irq_restore(flags); | ||
434 | } | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Compute the PTE value that we need to insert. | ||
439 | */ | ||
440 | pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED; | ||
441 | if (pgflags & _PAGE_WRITE) | ||
442 | pgflags |= _PAGE_DIRTY; | ||
443 | pte = pfn_pte(pfn, __pgprot(pgflags)); | ||
444 | |||
445 | /* Allocate space in the tree and write the PTE */ | ||
446 | ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); | ||
447 | if (ret == -EBUSY) { | ||
448 | /* | ||
449 | * There's already a PMD where wanted to install a large page; | ||
450 | * for now, fall back to installing a small page. | ||
451 | */ | ||
452 | level = 0; | ||
453 | pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1); | ||
454 | pte = pfn_pte(pfn, __pgprot(pgflags)); | ||
455 | ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); | ||
456 | } | ||
457 | if (ret == 0 || ret == -EAGAIN) | ||
458 | ret = RESUME_GUEST; | ||
459 | |||
460 | if (page) { | ||
461 | /* | ||
462 | * We drop pages[0] here, not page because page might | ||
463 | * have been set to the head page of a compound, but | ||
464 | * we have to drop the reference on the correct tail | ||
465 | * page to match the get inside gup() | ||
466 | */ | ||
467 | put_page(pages[0]); | ||
468 | } | ||
469 | return ret; | ||
470 | } | ||
471 | |||
472 | static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
473 | unsigned long gfn, unsigned int order) | ||
474 | { | ||
475 | unsigned long i, limit; | ||
476 | unsigned long *dp; | ||
477 | |||
478 | if (!memslot->dirty_bitmap) | ||
479 | return; | ||
480 | limit = 1ul << order; | ||
481 | if (limit < BITS_PER_LONG) { | ||
482 | for (i = 0; i < limit; ++i) | ||
483 | mark_page_dirty(kvm, gfn + i); | ||
484 | return; | ||
485 | } | ||
486 | dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn); | ||
487 | limit /= BITS_PER_LONG; | ||
488 | for (i = 0; i < limit; ++i) | ||
489 | *dp++ = ~0ul; | ||
490 | } | ||
491 | |||
492 | /* Called with kvm->lock held */ | ||
493 | int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
494 | unsigned long gfn) | ||
495 | { | ||
496 | pte_t *ptep; | ||
497 | unsigned long gpa = gfn << PAGE_SHIFT; | ||
498 | unsigned int shift; | ||
499 | unsigned long old; | ||
500 | |||
501 | ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, | ||
502 | NULL, &shift); | ||
503 | if (ptep && pte_present(*ptep)) { | ||
504 | old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, | ||
505 | gpa, shift); | ||
506 | kvmppc_radix_tlbie_page(kvm, gpa, shift); | ||
507 | if (old & _PAGE_DIRTY) { | ||
508 | if (!shift) | ||
509 | mark_page_dirty(kvm, gfn); | ||
510 | else | ||
511 | mark_pages_dirty(kvm, memslot, | ||
512 | gfn, shift - PAGE_SHIFT); | ||
513 | } | ||
514 | } | ||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | /* Called with kvm->lock held */ | ||
519 | int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
520 | unsigned long gfn) | ||
521 | { | ||
522 | pte_t *ptep; | ||
523 | unsigned long gpa = gfn << PAGE_SHIFT; | ||
524 | unsigned int shift; | ||
525 | int ref = 0; | ||
526 | |||
527 | ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, | ||
528 | NULL, &shift); | ||
529 | if (ptep && pte_present(*ptep) && pte_young(*ptep)) { | ||
530 | kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, | ||
531 | gpa, shift); | ||
532 | /* XXX need to flush tlb here? */ | ||
533 | ref = 1; | ||
534 | } | ||
535 | return ref; | ||
536 | } | ||
537 | |||
538 | /* Called with kvm->lock held */ | ||
539 | int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
540 | unsigned long gfn) | ||
541 | { | ||
542 | pte_t *ptep; | ||
543 | unsigned long gpa = gfn << PAGE_SHIFT; | ||
544 | unsigned int shift; | ||
545 | int ref = 0; | ||
546 | |||
547 | ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, | ||
548 | NULL, &shift); | ||
549 | if (ptep && pte_present(*ptep) && pte_young(*ptep)) | ||
550 | ref = 1; | ||
551 | return ref; | ||
552 | } | ||
553 | |||
554 | /* Returns the number of PAGE_SIZE pages that are dirty */ | ||
555 | static int kvm_radix_test_clear_dirty(struct kvm *kvm, | ||
556 | struct kvm_memory_slot *memslot, int pagenum) | ||
557 | { | ||
558 | unsigned long gfn = memslot->base_gfn + pagenum; | ||
559 | unsigned long gpa = gfn << PAGE_SHIFT; | ||
560 | pte_t *ptep; | ||
561 | unsigned int shift; | ||
562 | int ret = 0; | ||
563 | |||
564 | ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, | ||
565 | NULL, &shift); | ||
566 | if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { | ||
567 | ret = 1; | ||
568 | if (shift) | ||
569 | ret = 1 << (shift - PAGE_SHIFT); | ||
570 | kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, | ||
571 | gpa, shift); | ||
572 | kvmppc_radix_tlbie_page(kvm, gpa, shift); | ||
573 | } | ||
574 | return ret; | ||
575 | } | ||
576 | |||
577 | long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, | ||
578 | struct kvm_memory_slot *memslot, unsigned long *map) | ||
579 | { | ||
580 | unsigned long i, j; | ||
581 | unsigned long n, *p; | ||
582 | int npages; | ||
583 | |||
584 | /* | ||
585 | * Radix accumulates dirty bits in the first half of the | ||
586 | * memslot's dirty_bitmap area, for when pages are paged | ||
587 | * out or modified by the host directly. Pick up these | ||
588 | * bits and add them to the map. | ||
589 | */ | ||
590 | n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long); | ||
591 | p = memslot->dirty_bitmap; | ||
592 | for (i = 0; i < n; ++i) | ||
593 | map[i] |= xchg(&p[i], 0); | ||
594 | |||
595 | for (i = 0; i < memslot->npages; i = j) { | ||
596 | npages = kvm_radix_test_clear_dirty(kvm, memslot, i); | ||
597 | |||
598 | /* | ||
599 | * Note that if npages > 0 then i must be a multiple of npages, | ||
600 | * since huge pages are only used to back the guest at guest | ||
601 | * real addresses that are a multiple of their size. | ||
602 | * Since we have at most one PTE covering any given guest | ||
603 | * real address, if npages > 1 we can skip to i + npages. | ||
604 | */ | ||
605 | j = i + 1; | ||
606 | if (npages) | ||
607 | for (j = i; npages; ++j, --npages) | ||
608 | __set_bit_le(j, map); | ||
609 | } | ||
610 | return 0; | ||
611 | } | ||
612 | |||
613 | static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info, | ||
614 | int psize, int *indexp) | ||
615 | { | ||
616 | if (!mmu_psize_defs[psize].shift) | ||
617 | return; | ||
618 | info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift | | ||
619 | (mmu_psize_defs[psize].ap << 29); | ||
620 | ++(*indexp); | ||
621 | } | ||
622 | |||
623 | int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info) | ||
624 | { | ||
625 | int i; | ||
626 | |||
627 | if (!radix_enabled()) | ||
628 | return -EINVAL; | ||
629 | memset(info, 0, sizeof(*info)); | ||
630 | |||
631 | /* 4k page size */ | ||
632 | info->geometries[0].page_shift = 12; | ||
633 | info->geometries[0].level_bits[0] = 9; | ||
634 | for (i = 1; i < 4; ++i) | ||
635 | info->geometries[0].level_bits[i] = p9_supported_radix_bits[i]; | ||
636 | /* 64k page size */ | ||
637 | info->geometries[1].page_shift = 16; | ||
638 | for (i = 0; i < 4; ++i) | ||
639 | info->geometries[1].level_bits[i] = p9_supported_radix_bits[i]; | ||
640 | |||
641 | i = 0; | ||
642 | add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i); | ||
643 | add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i); | ||
644 | add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i); | ||
645 | add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i); | ||
646 | |||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | int kvmppc_init_vm_radix(struct kvm *kvm) | ||
651 | { | ||
652 | kvm->arch.pgtable = pgd_alloc(kvm->mm); | ||
653 | if (!kvm->arch.pgtable) | ||
654 | return -ENOMEM; | ||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | void kvmppc_free_radix(struct kvm *kvm) | ||
659 | { | ||
660 | unsigned long ig, iu, im; | ||
661 | pte_t *pte; | ||
662 | pmd_t *pmd; | ||
663 | pud_t *pud; | ||
664 | pgd_t *pgd; | ||
665 | |||
666 | if (!kvm->arch.pgtable) | ||
667 | return; | ||
668 | pgd = kvm->arch.pgtable; | ||
669 | for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { | ||
670 | if (!pgd_present(*pgd)) | ||
671 | continue; | ||
672 | pud = pud_offset(pgd, 0); | ||
673 | for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { | ||
674 | if (!pud_present(*pud)) | ||
675 | continue; | ||
676 | pmd = pmd_offset(pud, 0); | ||
677 | for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { | ||
678 | if (pmd_huge(*pmd)) { | ||
679 | pmd_clear(pmd); | ||
680 | continue; | ||
681 | } | ||
682 | if (!pmd_present(*pmd)) | ||
683 | continue; | ||
684 | pte = pte_offset_map(pmd, 0); | ||
685 | memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); | ||
686 | kvmppc_pte_free(pte); | ||
687 | pmd_clear(pmd); | ||
688 | } | ||
689 | pmd_free(kvm->mm, pmd_offset(pud, 0)); | ||
690 | pud_clear(pud); | ||
691 | } | ||
692 | pud_free(kvm->mm, pud_offset(pgd, 0)); | ||
693 | pgd_clear(pgd); | ||
694 | } | ||
695 | pgd_free(kvm->mm, kvm->arch.pgtable); | ||
696 | } | ||
697 | |||
698 | static void pte_ctor(void *addr) | ||
699 | { | ||
700 | memset(addr, 0, PTE_TABLE_SIZE); | ||
701 | } | ||
702 | |||
703 | int kvmppc_radix_init(void) | ||
704 | { | ||
705 | unsigned long size = sizeof(void *) << PTE_INDEX_SIZE; | ||
706 | |||
707 | kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor); | ||
708 | if (!kvm_pte_cache) | ||
709 | return -ENOMEM; | ||
710 | return 0; | ||
711 | } | ||
712 | |||
713 | void kvmppc_radix_exit(void) | ||
714 | { | ||
715 | kmem_cache_destroy(kvm_pte_cache); | ||
716 | } | ||
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 856cc9d38efd..bdf281cc88c0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -1132,7 +1132,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, | |||
1132 | /* | 1132 | /* |
1133 | * Userspace can only modify DPFD (default prefetch depth), | 1133 | * Userspace can only modify DPFD (default prefetch depth), |
1134 | * ILE (interrupt little-endian) and TC (translation control). | 1134 | * ILE (interrupt little-endian) and TC (translation control). |
1135 | * On POWER8 userspace can also modify AIL (alt. interrupt loc.) | 1135 | * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.). |
1136 | */ | 1136 | */ |
1137 | mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; | 1137 | mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; |
1138 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) | 1138 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) |
@@ -1818,6 +1818,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, | |||
1818 | vcpu->arch.vcore = vcore; | 1818 | vcpu->arch.vcore = vcore; |
1819 | vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; | 1819 | vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; |
1820 | vcpu->arch.thread_cpu = -1; | 1820 | vcpu->arch.thread_cpu = -1; |
1821 | vcpu->arch.prev_cpu = -1; | ||
1821 | 1822 | ||
1822 | vcpu->arch.cpu_type = KVM_CPU_3S_64; | 1823 | vcpu->arch.cpu_type = KVM_CPU_3S_64; |
1823 | kvmppc_sanity_check(vcpu); | 1824 | kvmppc_sanity_check(vcpu); |
@@ -1947,11 +1948,33 @@ static void kvmppc_release_hwthread(int cpu) | |||
1947 | tpaca->kvm_hstate.kvm_split_mode = NULL; | 1948 | tpaca->kvm_hstate.kvm_split_mode = NULL; |
1948 | } | 1949 | } |
1949 | 1950 | ||
1951 | static void do_nothing(void *x) | ||
1952 | { | ||
1953 | } | ||
1954 | |||
1955 | static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) | ||
1956 | { | ||
1957 | int i; | ||
1958 | |||
1959 | cpu = cpu_first_thread_sibling(cpu); | ||
1960 | cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); | ||
1961 | /* | ||
1962 | * Make sure setting of bit in need_tlb_flush precedes | ||
1963 | * testing of cpu_in_guest bits. The matching barrier on | ||
1964 | * the other side is the first smp_mb() in kvmppc_run_core(). | ||
1965 | */ | ||
1966 | smp_mb(); | ||
1967 | for (i = 0; i < threads_per_core; ++i) | ||
1968 | if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest)) | ||
1969 | smp_call_function_single(cpu + i, do_nothing, NULL, 1); | ||
1970 | } | ||
1971 | |||
1950 | static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) | 1972 | static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) |
1951 | { | 1973 | { |
1952 | int cpu; | 1974 | int cpu; |
1953 | struct paca_struct *tpaca; | 1975 | struct paca_struct *tpaca; |
1954 | struct kvmppc_vcore *mvc = vc->master_vcore; | 1976 | struct kvmppc_vcore *mvc = vc->master_vcore; |
1977 | struct kvm *kvm = vc->kvm; | ||
1955 | 1978 | ||
1956 | cpu = vc->pcpu; | 1979 | cpu = vc->pcpu; |
1957 | if (vcpu) { | 1980 | if (vcpu) { |
@@ -1962,6 +1985,27 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) | |||
1962 | cpu += vcpu->arch.ptid; | 1985 | cpu += vcpu->arch.ptid; |
1963 | vcpu->cpu = mvc->pcpu; | 1986 | vcpu->cpu = mvc->pcpu; |
1964 | vcpu->arch.thread_cpu = cpu; | 1987 | vcpu->arch.thread_cpu = cpu; |
1988 | |||
1989 | /* | ||
1990 | * With radix, the guest can do TLB invalidations itself, | ||
1991 | * and it could choose to use the local form (tlbiel) if | ||
1992 | * it is invalidating a translation that has only ever been | ||
1993 | * used on one vcpu. However, that doesn't mean it has | ||
1994 | * only ever been used on one physical cpu, since vcpus | ||
1995 | * can move around between pcpus. To cope with this, when | ||
1996 | * a vcpu moves from one pcpu to another, we need to tell | ||
1997 | * any vcpus running on the same core as this vcpu previously | ||
1998 | * ran to flush the TLB. The TLB is shared between threads, | ||
1999 | * so we use a single bit in .need_tlb_flush for all 4 threads. | ||
2000 | */ | ||
2001 | if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) { | ||
2002 | if (vcpu->arch.prev_cpu >= 0 && | ||
2003 | cpu_first_thread_sibling(vcpu->arch.prev_cpu) != | ||
2004 | cpu_first_thread_sibling(cpu)) | ||
2005 | radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu); | ||
2006 | vcpu->arch.prev_cpu = cpu; | ||
2007 | } | ||
2008 | cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); | ||
1965 | } | 2009 | } |
1966 | tpaca = &paca[cpu]; | 2010 | tpaca = &paca[cpu]; |
1967 | tpaca->kvm_hstate.kvm_vcpu = vcpu; | 2011 | tpaca->kvm_hstate.kvm_vcpu = vcpu; |
@@ -2549,6 +2593,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2549 | kvmppc_release_hwthread(pcpu + i); | 2593 | kvmppc_release_hwthread(pcpu + i); |
2550 | if (sip && sip->napped[i]) | 2594 | if (sip && sip->napped[i]) |
2551 | kvmppc_ipi_thread(pcpu + i); | 2595 | kvmppc_ipi_thread(pcpu + i); |
2596 | cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); | ||
2552 | } | 2597 | } |
2553 | 2598 | ||
2554 | kvmppc_set_host_core(pcpu); | 2599 | kvmppc_set_host_core(pcpu); |
@@ -2875,7 +2920,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
2875 | smp_mb(); | 2920 | smp_mb(); |
2876 | 2921 | ||
2877 | /* On the first time here, set up HTAB and VRMA */ | 2922 | /* On the first time here, set up HTAB and VRMA */ |
2878 | if (!vcpu->kvm->arch.hpte_setup_done) { | 2923 | if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) { |
2879 | r = kvmppc_hv_setup_htab_rma(vcpu); | 2924 | r = kvmppc_hv_setup_htab_rma(vcpu); |
2880 | if (r) | 2925 | if (r) |
2881 | goto out; | 2926 | goto out; |
@@ -2937,6 +2982,13 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, | |||
2937 | { | 2982 | { |
2938 | struct kvm_ppc_one_seg_page_size *sps; | 2983 | struct kvm_ppc_one_seg_page_size *sps; |
2939 | 2984 | ||
2985 | /* | ||
2986 | * Since we don't yet support HPT guests on a radix host, | ||
2987 | * return an error if the host uses radix. | ||
2988 | */ | ||
2989 | if (radix_enabled()) | ||
2990 | return -EINVAL; | ||
2991 | |||
2940 | info->flags = KVM_PPC_PAGE_SIZES_REAL; | 2992 | info->flags = KVM_PPC_PAGE_SIZES_REAL; |
2941 | if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) | 2993 | if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) |
2942 | info->flags |= KVM_PPC_1T_SEGMENTS; | 2994 | info->flags |= KVM_PPC_1T_SEGMENTS; |
@@ -2959,8 +3011,10 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, | |||
2959 | { | 3011 | { |
2960 | struct kvm_memslots *slots; | 3012 | struct kvm_memslots *slots; |
2961 | struct kvm_memory_slot *memslot; | 3013 | struct kvm_memory_slot *memslot; |
2962 | int r; | 3014 | int i, r; |
2963 | unsigned long n; | 3015 | unsigned long n; |
3016 | unsigned long *buf; | ||
3017 | struct kvm_vcpu *vcpu; | ||
2964 | 3018 | ||
2965 | mutex_lock(&kvm->slots_lock); | 3019 | mutex_lock(&kvm->slots_lock); |
2966 | 3020 | ||
@@ -2974,15 +3028,32 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, | |||
2974 | if (!memslot->dirty_bitmap) | 3028 | if (!memslot->dirty_bitmap) |
2975 | goto out; | 3029 | goto out; |
2976 | 3030 | ||
3031 | /* | ||
3032 | * Use second half of bitmap area because radix accumulates | ||
3033 | * bits in the first half. | ||
3034 | */ | ||
2977 | n = kvm_dirty_bitmap_bytes(memslot); | 3035 | n = kvm_dirty_bitmap_bytes(memslot); |
2978 | memset(memslot->dirty_bitmap, 0, n); | 3036 | buf = memslot->dirty_bitmap + n / sizeof(long); |
3037 | memset(buf, 0, n); | ||
2979 | 3038 | ||
2980 | r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); | 3039 | if (kvm_is_radix(kvm)) |
3040 | r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf); | ||
3041 | else | ||
3042 | r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf); | ||
2981 | if (r) | 3043 | if (r) |
2982 | goto out; | 3044 | goto out; |
2983 | 3045 | ||
3046 | /* Harvest dirty bits from VPA and DTL updates */ | ||
3047 | /* Note: we never modify the SLB shadow buffer areas */ | ||
3048 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
3049 | spin_lock(&vcpu->arch.vpa_update_lock); | ||
3050 | kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf); | ||
3051 | kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf); | ||
3052 | spin_unlock(&vcpu->arch.vpa_update_lock); | ||
3053 | } | ||
3054 | |||
2984 | r = -EFAULT; | 3055 | r = -EFAULT; |
2985 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) | 3056 | if (copy_to_user(log->dirty_bitmap, buf, n)) |
2986 | goto out; | 3057 | goto out; |
2987 | 3058 | ||
2988 | r = 0; | 3059 | r = 0; |
@@ -3003,6 +3074,15 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, | |||
3003 | static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, | 3074 | static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, |
3004 | unsigned long npages) | 3075 | unsigned long npages) |
3005 | { | 3076 | { |
3077 | /* | ||
3078 | * For now, if radix_enabled() then we only support radix guests, | ||
3079 | * and in that case we don't need the rmap array. | ||
3080 | */ | ||
3081 | if (radix_enabled()) { | ||
3082 | slot->arch.rmap = NULL; | ||
3083 | return 0; | ||
3084 | } | ||
3085 | |||
3006 | slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); | 3086 | slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); |
3007 | if (!slot->arch.rmap) | 3087 | if (!slot->arch.rmap) |
3008 | return -ENOMEM; | 3088 | return -ENOMEM; |
@@ -3035,7 +3115,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, | |||
3035 | if (npages) | 3115 | if (npages) |
3036 | atomic64_inc(&kvm->arch.mmio_update); | 3116 | atomic64_inc(&kvm->arch.mmio_update); |
3037 | 3117 | ||
3038 | if (npages && old->npages) { | 3118 | if (npages && old->npages && !kvm_is_radix(kvm)) { |
3039 | /* | 3119 | /* |
3040 | * If modifying a memslot, reset all the rmap dirty bits. | 3120 | * If modifying a memslot, reset all the rmap dirty bits. |
3041 | * If this is a new memslot, we don't need to do anything | 3121 | * If this is a new memslot, we don't need to do anything |
@@ -3044,7 +3124,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, | |||
3044 | */ | 3124 | */ |
3045 | slots = kvm_memslots(kvm); | 3125 | slots = kvm_memslots(kvm); |
3046 | memslot = id_to_memslot(slots, mem->slot); | 3126 | memslot = id_to_memslot(slots, mem->slot); |
3047 | kvmppc_hv_get_dirty_log(kvm, memslot, NULL); | 3127 | kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL); |
3048 | } | 3128 | } |
3049 | } | 3129 | } |
3050 | 3130 | ||
@@ -3083,14 +3163,20 @@ static void kvmppc_setup_partition_table(struct kvm *kvm) | |||
3083 | { | 3163 | { |
3084 | unsigned long dw0, dw1; | 3164 | unsigned long dw0, dw1; |
3085 | 3165 | ||
3086 | /* PS field - page size for VRMA */ | 3166 | if (!kvm_is_radix(kvm)) { |
3087 | dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | | 3167 | /* PS field - page size for VRMA */ |
3088 | ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); | 3168 | dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | |
3089 | /* HTABSIZE and HTABORG fields */ | 3169 | ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); |
3090 | dw0 |= kvm->arch.sdr1; | 3170 | /* HTABSIZE and HTABORG fields */ |
3171 | dw0 |= kvm->arch.sdr1; | ||
3091 | 3172 | ||
3092 | /* Second dword has GR=0; other fields are unused since UPRT=0 */ | 3173 | /* Second dword as set by userspace */ |
3093 | dw1 = 0; | 3174 | dw1 = kvm->arch.process_table; |
3175 | } else { | ||
3176 | dw0 = PATB_HR | radix__get_tree_size() | | ||
3177 | __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE; | ||
3178 | dw1 = PATB_GR | kvm->arch.process_table; | ||
3179 | } | ||
3094 | 3180 | ||
3095 | mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); | 3181 | mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); |
3096 | } | 3182 | } |
@@ -3260,6 +3346,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | |||
3260 | { | 3346 | { |
3261 | unsigned long lpcr, lpid; | 3347 | unsigned long lpcr, lpid; |
3262 | char buf[32]; | 3348 | char buf[32]; |
3349 | int ret; | ||
3263 | 3350 | ||
3264 | /* Allocate the guest's logical partition ID */ | 3351 | /* Allocate the guest's logical partition ID */ |
3265 | 3352 | ||
@@ -3307,13 +3394,30 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | |||
3307 | lpcr |= LPCR_HVICE; | 3394 | lpcr |= LPCR_HVICE; |
3308 | } | 3395 | } |
3309 | 3396 | ||
3397 | /* | ||
3398 | * For now, if the host uses radix, the guest must be radix. | ||
3399 | */ | ||
3400 | if (radix_enabled()) { | ||
3401 | kvm->arch.radix = 1; | ||
3402 | lpcr &= ~LPCR_VPM1; | ||
3403 | lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; | ||
3404 | ret = kvmppc_init_vm_radix(kvm); | ||
3405 | if (ret) { | ||
3406 | kvmppc_free_lpid(kvm->arch.lpid); | ||
3407 | return ret; | ||
3408 | } | ||
3409 | kvmppc_setup_partition_table(kvm); | ||
3410 | } | ||
3411 | |||
3310 | kvm->arch.lpcr = lpcr; | 3412 | kvm->arch.lpcr = lpcr; |
3311 | 3413 | ||
3312 | /* | 3414 | /* |
3313 | * Work out how many sets the TLB has, for the use of | 3415 | * Work out how many sets the TLB has, for the use of |
3314 | * the TLB invalidation loop in book3s_hv_rmhandlers.S. | 3416 | * the TLB invalidation loop in book3s_hv_rmhandlers.S. |
3315 | */ | 3417 | */ |
3316 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | 3418 | if (kvm_is_radix(kvm)) |
3419 | kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */ | ||
3420 | else if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
3317 | kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ | 3421 | kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ |
3318 | else if (cpu_has_feature(CPU_FTR_ARCH_207S)) | 3422 | else if (cpu_has_feature(CPU_FTR_ARCH_207S)) |
3319 | kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */ | 3423 | kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */ |
@@ -3323,8 +3427,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | |||
3323 | /* | 3427 | /* |
3324 | * Track that we now have a HV mode VM active. This blocks secondary | 3428 | * Track that we now have a HV mode VM active. This blocks secondary |
3325 | * CPU threads from coming online. | 3429 | * CPU threads from coming online. |
3430 | * On POWER9, we only need to do this for HPT guests on a radix | ||
3431 | * host, which is not yet supported. | ||
3326 | */ | 3432 | */ |
3327 | kvm_hv_vm_activated(); | 3433 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) |
3434 | kvm_hv_vm_activated(); | ||
3328 | 3435 | ||
3329 | /* | 3436 | /* |
3330 | * Create a debugfs directory for the VM | 3437 | * Create a debugfs directory for the VM |
@@ -3350,11 +3457,17 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) | |||
3350 | { | 3457 | { |
3351 | debugfs_remove_recursive(kvm->arch.debugfs_dir); | 3458 | debugfs_remove_recursive(kvm->arch.debugfs_dir); |
3352 | 3459 | ||
3353 | kvm_hv_vm_deactivated(); | 3460 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) |
3461 | kvm_hv_vm_deactivated(); | ||
3354 | 3462 | ||
3355 | kvmppc_free_vcores(kvm); | 3463 | kvmppc_free_vcores(kvm); |
3356 | 3464 | ||
3357 | kvmppc_free_hpt(kvm); | 3465 | kvmppc_free_lpid(kvm->arch.lpid); |
3466 | |||
3467 | if (kvm_is_radix(kvm)) | ||
3468 | kvmppc_free_radix(kvm); | ||
3469 | else | ||
3470 | kvmppc_free_hpt(kvm); | ||
3358 | 3471 | ||
3359 | kvmppc_free_pimap(kvm); | 3472 | kvmppc_free_pimap(kvm); |
3360 | } | 3473 | } |
@@ -3383,11 +3496,6 @@ static int kvmppc_core_check_processor_compat_hv(void) | |||
3383 | if (!cpu_has_feature(CPU_FTR_HVMODE) || | 3496 | if (!cpu_has_feature(CPU_FTR_HVMODE) || |
3384 | !cpu_has_feature(CPU_FTR_ARCH_206)) | 3497 | !cpu_has_feature(CPU_FTR_ARCH_206)) |
3385 | return -EIO; | 3498 | return -EIO; |
3386 | /* | ||
3387 | * Disable KVM for Power9 in radix mode. | ||
3388 | */ | ||
3389 | if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled()) | ||
3390 | return -EIO; | ||
3391 | 3499 | ||
3392 | return 0; | 3500 | return 0; |
3393 | } | 3501 | } |
@@ -3655,6 +3763,41 @@ static void init_default_hcalls(void) | |||
3655 | } | 3763 | } |
3656 | } | 3764 | } |
3657 | 3765 | ||
3766 | static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) | ||
3767 | { | ||
3768 | unsigned long lpcr; | ||
3769 | int radix; | ||
3770 | |||
3771 | /* If not on a POWER9, reject it */ | ||
3772 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) | ||
3773 | return -ENODEV; | ||
3774 | |||
3775 | /* If any unknown flags set, reject it */ | ||
3776 | if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE)) | ||
3777 | return -EINVAL; | ||
3778 | |||
3779 | /* We can't change a guest to/from radix yet */ | ||
3780 | radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX); | ||
3781 | if (radix != kvm_is_radix(kvm)) | ||
3782 | return -EINVAL; | ||
3783 | |||
3784 | /* GR (guest radix) bit in process_table field must match */ | ||
3785 | if (!!(cfg->process_table & PATB_GR) != radix) | ||
3786 | return -EINVAL; | ||
3787 | |||
3788 | /* Process table size field must be reasonable, i.e. <= 24 */ | ||
3789 | if ((cfg->process_table & PRTS_MASK) > 24) | ||
3790 | return -EINVAL; | ||
3791 | |||
3792 | kvm->arch.process_table = cfg->process_table; | ||
3793 | kvmppc_setup_partition_table(kvm); | ||
3794 | |||
3795 | lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0; | ||
3796 | kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE); | ||
3797 | |||
3798 | return 0; | ||
3799 | } | ||
3800 | |||
3658 | static struct kvmppc_ops kvm_ops_hv = { | 3801 | static struct kvmppc_ops kvm_ops_hv = { |
3659 | .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, | 3802 | .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, |
3660 | .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, | 3803 | .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, |
@@ -3692,6 +3835,8 @@ static struct kvmppc_ops kvm_ops_hv = { | |||
3692 | .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, | 3835 | .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, |
3693 | .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, | 3836 | .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, |
3694 | #endif | 3837 | #endif |
3838 | .configure_mmu = kvmhv_configure_mmu, | ||
3839 | .get_rmmu_info = kvmhv_get_rmmu_info, | ||
3695 | }; | 3840 | }; |
3696 | 3841 | ||
3697 | static int kvm_init_subcore_bitmap(void) | 3842 | static int kvm_init_subcore_bitmap(void) |
@@ -3726,6 +3871,11 @@ static int kvm_init_subcore_bitmap(void) | |||
3726 | return 0; | 3871 | return 0; |
3727 | } | 3872 | } |
3728 | 3873 | ||
3874 | static int kvmppc_radix_possible(void) | ||
3875 | { | ||
3876 | return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled(); | ||
3877 | } | ||
3878 | |||
3729 | static int kvmppc_book3s_init_hv(void) | 3879 | static int kvmppc_book3s_init_hv(void) |
3730 | { | 3880 | { |
3731 | int r; | 3881 | int r; |
@@ -3765,12 +3915,19 @@ static int kvmppc_book3s_init_hv(void) | |||
3765 | init_vcore_lists(); | 3915 | init_vcore_lists(); |
3766 | 3916 | ||
3767 | r = kvmppc_mmu_hv_init(); | 3917 | r = kvmppc_mmu_hv_init(); |
3918 | if (r) | ||
3919 | return r; | ||
3920 | |||
3921 | if (kvmppc_radix_possible()) | ||
3922 | r = kvmppc_radix_init(); | ||
3768 | return r; | 3923 | return r; |
3769 | } | 3924 | } |
3770 | 3925 | ||
3771 | static void kvmppc_book3s_exit_hv(void) | 3926 | static void kvmppc_book3s_exit_hv(void) |
3772 | { | 3927 | { |
3773 | kvmppc_free_host_rm_ops(); | 3928 | kvmppc_free_host_rm_ops(); |
3929 | if (kvmppc_radix_possible()) | ||
3930 | kvmppc_radix_exit(); | ||
3774 | kvmppc_hv_ops = NULL; | 3931 | kvmppc_hv_ops = NULL; |
3775 | } | 3932 | } |
3776 | 3933 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 5bb24be0b346..fe08fea54b70 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -29,6 +29,11 @@ | |||
29 | #include <asm/opal.h> | 29 | #include <asm/opal.h> |
30 | #include <asm/smp.h> | 30 | #include <asm/smp.h> |
31 | 31 | ||
32 | static bool in_realmode(void) | ||
33 | { | ||
34 | return !(mfmsr() & MSR_IR); | ||
35 | } | ||
36 | |||
32 | #define KVM_CMA_CHUNK_ORDER 18 | 37 | #define KVM_CMA_CHUNK_ORDER 18 |
33 | 38 | ||
34 | /* | 39 | /* |
@@ -200,7 +205,6 @@ static inline void rm_writeb(unsigned long paddr, u8 val) | |||
200 | 205 | ||
201 | /* | 206 | /* |
202 | * Send an interrupt or message to another CPU. | 207 | * Send an interrupt or message to another CPU. |
203 | * This can only be called in real mode. | ||
204 | * The caller needs to include any barrier needed to order writes | 208 | * The caller needs to include any barrier needed to order writes |
205 | * to memory vs. the IPI/message. | 209 | * to memory vs. the IPI/message. |
206 | */ | 210 | */ |
@@ -226,7 +230,9 @@ void kvmhv_rm_send_ipi(int cpu) | |||
226 | 230 | ||
227 | /* Else poke the target with an IPI */ | 231 | /* Else poke the target with an IPI */ |
228 | xics_phys = paca[cpu].kvm_hstate.xics_phys; | 232 | xics_phys = paca[cpu].kvm_hstate.xics_phys; |
229 | if (xics_phys) | 233 | if (!in_realmode()) |
234 | opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY); | ||
235 | else if (xics_phys) | ||
230 | rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); | 236 | rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); |
231 | else | 237 | else |
232 | opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), | 238 | opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), |
@@ -412,14 +418,15 @@ static long kvmppc_read_one_intr(bool *again) | |||
412 | 418 | ||
413 | /* Now read the interrupt from the ICP */ | 419 | /* Now read the interrupt from the ICP */ |
414 | xics_phys = local_paca->kvm_hstate.xics_phys; | 420 | xics_phys = local_paca->kvm_hstate.xics_phys; |
415 | if (!xics_phys) { | 421 | rc = 0; |
416 | /* Use OPAL to read the XIRR */ | 422 | if (!in_realmode()) |
423 | rc = opal_int_get_xirr(&xirr, false); | ||
424 | else if (!xics_phys) | ||
417 | rc = opal_rm_int_get_xirr(&xirr, false); | 425 | rc = opal_rm_int_get_xirr(&xirr, false); |
418 | if (rc < 0) | 426 | else |
419 | return 1; | ||
420 | } else { | ||
421 | xirr = _lwzcix(xics_phys + XICS_XIRR); | 427 | xirr = _lwzcix(xics_phys + XICS_XIRR); |
422 | } | 428 | if (rc < 0) |
429 | return 1; | ||
423 | 430 | ||
424 | /* | 431 | /* |
425 | * Save XIRR for later. Since we get control in reverse endian | 432 | * Save XIRR for later. Since we get control in reverse endian |
@@ -445,15 +452,19 @@ static long kvmppc_read_one_intr(bool *again) | |||
445 | * If it is an IPI, clear the MFRR and EOI it. | 452 | * If it is an IPI, clear the MFRR and EOI it. |
446 | */ | 453 | */ |
447 | if (xisr == XICS_IPI) { | 454 | if (xisr == XICS_IPI) { |
448 | if (xics_phys) { | 455 | rc = 0; |
456 | if (!in_realmode()) { | ||
457 | opal_int_set_mfrr(hard_smp_processor_id(), 0xff); | ||
458 | rc = opal_int_eoi(h_xirr); | ||
459 | } else if (xics_phys) { | ||
449 | _stbcix(xics_phys + XICS_MFRR, 0xff); | 460 | _stbcix(xics_phys + XICS_MFRR, 0xff); |
450 | _stwcix(xics_phys + XICS_XIRR, xirr); | 461 | _stwcix(xics_phys + XICS_XIRR, xirr); |
451 | } else { | 462 | } else { |
452 | opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); | 463 | opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); |
453 | rc = opal_rm_int_eoi(h_xirr); | 464 | rc = opal_rm_int_eoi(h_xirr); |
454 | /* If rc > 0, there is another interrupt pending */ | ||
455 | *again = rc > 0; | ||
456 | } | 465 | } |
466 | /* If rc > 0, there is another interrupt pending */ | ||
467 | *again = rc > 0; | ||
457 | 468 | ||
458 | /* | 469 | /* |
459 | * Need to ensure side effects of above stores | 470 | * Need to ensure side effects of above stores |
@@ -471,7 +482,10 @@ static long kvmppc_read_one_intr(bool *again) | |||
471 | /* We raced with the host, | 482 | /* We raced with the host, |
472 | * we need to resend that IPI, bummer | 483 | * we need to resend that IPI, bummer |
473 | */ | 484 | */ |
474 | if (xics_phys) | 485 | if (!in_realmode()) |
486 | opal_int_set_mfrr(hard_smp_processor_id(), | ||
487 | IPI_PRIORITY); | ||
488 | else if (xics_phys) | ||
475 | _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); | 489 | _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); |
476 | else | 490 | else |
477 | opal_rm_int_set_mfrr(hard_smp_processor_id(), | 491 | opal_rm_int_set_mfrr(hard_smp_processor_id(), |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 9ef3c4be952f..b095afcd4309 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -43,6 +43,7 @@ static void *real_vmalloc_addr(void *x) | |||
43 | static int global_invalidates(struct kvm *kvm, unsigned long flags) | 43 | static int global_invalidates(struct kvm *kvm, unsigned long flags) |
44 | { | 44 | { |
45 | int global; | 45 | int global; |
46 | int cpu; | ||
46 | 47 | ||
47 | /* | 48 | /* |
48 | * If there is only one vcore, and it's currently running, | 49 | * If there is only one vcore, and it's currently running, |
@@ -60,8 +61,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags) | |||
60 | /* any other core might now have stale TLB entries... */ | 61 | /* any other core might now have stale TLB entries... */ |
61 | smp_wmb(); | 62 | smp_wmb(); |
62 | cpumask_setall(&kvm->arch.need_tlb_flush); | 63 | cpumask_setall(&kvm->arch.need_tlb_flush); |
63 | cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu, | 64 | cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; |
64 | &kvm->arch.need_tlb_flush); | 65 | /* |
66 | * On POWER9, threads are independent but the TLB is shared, | ||
67 | * so use the bit for the first thread to represent the core. | ||
68 | */ | ||
69 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
70 | cpu = cpu_first_thread_sibling(cpu); | ||
71 | cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); | ||
65 | } | 72 | } |
66 | 73 | ||
67 | return global; | 74 | return global; |
@@ -182,6 +189,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
182 | unsigned long mmu_seq; | 189 | unsigned long mmu_seq; |
183 | unsigned long rcbits, irq_flags = 0; | 190 | unsigned long rcbits, irq_flags = 0; |
184 | 191 | ||
192 | if (kvm_is_radix(kvm)) | ||
193 | return H_FUNCTION; | ||
185 | psize = hpte_page_size(pteh, ptel); | 194 | psize = hpte_page_size(pteh, ptel); |
186 | if (!psize) | 195 | if (!psize) |
187 | return H_PARAMETER; | 196 | return H_PARAMETER; |
@@ -458,6 +467,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | |||
458 | struct revmap_entry *rev; | 467 | struct revmap_entry *rev; |
459 | u64 pte, orig_pte, pte_r; | 468 | u64 pte, orig_pte, pte_r; |
460 | 469 | ||
470 | if (kvm_is_radix(kvm)) | ||
471 | return H_FUNCTION; | ||
461 | if (pte_index >= kvm->arch.hpt_npte) | 472 | if (pte_index >= kvm->arch.hpt_npte) |
462 | return H_PARAMETER; | 473 | return H_PARAMETER; |
463 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); | 474 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); |
@@ -529,6 +540,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
529 | struct revmap_entry *rev, *revs[4]; | 540 | struct revmap_entry *rev, *revs[4]; |
530 | u64 hp0, hp1; | 541 | u64 hp0, hp1; |
531 | 542 | ||
543 | if (kvm_is_radix(kvm)) | ||
544 | return H_FUNCTION; | ||
532 | global = global_invalidates(kvm, 0); | 545 | global = global_invalidates(kvm, 0); |
533 | for (i = 0; i < 4 && ret == H_SUCCESS; ) { | 546 | for (i = 0; i < 4 && ret == H_SUCCESS; ) { |
534 | n = 0; | 547 | n = 0; |
@@ -642,6 +655,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, | |||
642 | unsigned long v, r, rb, mask, bits; | 655 | unsigned long v, r, rb, mask, bits; |
643 | u64 pte_v, pte_r; | 656 | u64 pte_v, pte_r; |
644 | 657 | ||
658 | if (kvm_is_radix(kvm)) | ||
659 | return H_FUNCTION; | ||
645 | if (pte_index >= kvm->arch.hpt_npte) | 660 | if (pte_index >= kvm->arch.hpt_npte) |
646 | return H_PARAMETER; | 661 | return H_PARAMETER; |
647 | 662 | ||
@@ -711,6 +726,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, | |||
711 | int i, n = 1; | 726 | int i, n = 1; |
712 | struct revmap_entry *rev = NULL; | 727 | struct revmap_entry *rev = NULL; |
713 | 728 | ||
729 | if (kvm_is_radix(kvm)) | ||
730 | return H_FUNCTION; | ||
714 | if (pte_index >= kvm->arch.hpt_npte) | 731 | if (pte_index >= kvm->arch.hpt_npte) |
715 | return H_PARAMETER; | 732 | return H_PARAMETER; |
716 | if (flags & H_READ_4) { | 733 | if (flags & H_READ_4) { |
@@ -750,6 +767,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, | |||
750 | unsigned long *rmap; | 767 | unsigned long *rmap; |
751 | long ret = H_NOT_FOUND; | 768 | long ret = H_NOT_FOUND; |
752 | 769 | ||
770 | if (kvm_is_radix(kvm)) | ||
771 | return H_FUNCTION; | ||
753 | if (pte_index >= kvm->arch.hpt_npte) | 772 | if (pte_index >= kvm->arch.hpt_npte) |
754 | return H_PARAMETER; | 773 | return H_PARAMETER; |
755 | 774 | ||
@@ -796,6 +815,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, | |||
796 | unsigned long *rmap; | 815 | unsigned long *rmap; |
797 | long ret = H_NOT_FOUND; | 816 | long ret = H_NOT_FOUND; |
798 | 817 | ||
818 | if (kvm_is_radix(kvm)) | ||
819 | return H_FUNCTION; | ||
799 | if (pte_index >= kvm->arch.hpt_npte) | 820 | if (pte_index >= kvm->arch.hpt_npte) |
800 | return H_PARAMETER; | 821 | return H_PARAMETER; |
801 | 822 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 44cfdd281fa1..0b2e388f4cdf 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c | |||
@@ -62,11 +62,9 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) | |||
62 | hcpu = hcore << threads_shift; | 62 | hcpu = hcore << threads_shift; |
63 | kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; | 63 | kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; |
64 | smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); | 64 | smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); |
65 | if (paca[hcpu].kvm_hstate.xics_phys) | 65 | kvmppc_set_host_ipi(hcpu, 1); |
66 | icp_native_cause_ipi_rm(hcpu); | 66 | smp_mb(); |
67 | else | 67 | kvmhv_rm_send_ipi(hcpu); |
68 | opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu), | ||
69 | IPI_PRIORITY); | ||
70 | } | 68 | } |
71 | #else | 69 | #else |
72 | static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } | 70 | static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9338a818e05c..47414a6fe2dd 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -148,6 +148,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
148 | addi r1, r1, 112 | 148 | addi r1, r1, 112 |
149 | ld r7, HSTATE_HOST_MSR(r13) | 149 | ld r7, HSTATE_HOST_MSR(r13) |
150 | 150 | ||
151 | /* | ||
152 | * If we came back from the guest via a relocation-on interrupt, | ||
153 | * we will be in virtual mode at this point, which makes it a | ||
154 | * little easier to get back to the caller. | ||
155 | */ | ||
156 | mfmsr r0 | ||
157 | andi. r0, r0, MSR_IR /* in real mode? */ | ||
158 | bne .Lvirt_return | ||
159 | |||
151 | cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK | 160 | cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK |
152 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | 161 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL |
153 | beq 11f | 162 | beq 11f |
@@ -181,6 +190,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
181 | mtspr SPRN_HSRR1, r7 | 190 | mtspr SPRN_HSRR1, r7 |
182 | ba 0xe80 | 191 | ba 0xe80 |
183 | 192 | ||
193 | /* Virtual-mode return - can't get here for HMI or machine check */ | ||
194 | .Lvirt_return: | ||
195 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | ||
196 | beq 16f | ||
197 | cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL | ||
198 | beq 17f | ||
199 | andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */ | ||
200 | beq 18f | ||
201 | mtmsrd r7, 1 /* if so then re-enable them */ | ||
202 | 18: mtlr r8 | ||
203 | blr | ||
204 | |||
205 | 16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */ | ||
206 | mtspr SPRN_HSRR1, r7 | ||
207 | b exc_virt_0x4500_hardware_interrupt | ||
208 | |||
209 | 17: mtspr SPRN_HSRR0, r8 | ||
210 | mtspr SPRN_HSRR1, r7 | ||
211 | b exc_virt_0x4e80_h_doorbell | ||
212 | |||
184 | kvmppc_primary_no_guest: | 213 | kvmppc_primary_no_guest: |
185 | /* We handle this much like a ceded vcpu */ | 214 | /* We handle this much like a ceded vcpu */ |
186 | /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ | 215 | /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ |
@@ -518,6 +547,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
518 | /* Stack frame offsets */ | 547 | /* Stack frame offsets */ |
519 | #define STACK_SLOT_TID (112-16) | 548 | #define STACK_SLOT_TID (112-16) |
520 | #define STACK_SLOT_PSSCR (112-24) | 549 | #define STACK_SLOT_PSSCR (112-24) |
550 | #define STACK_SLOT_PID (112-32) | ||
521 | 551 | ||
522 | .global kvmppc_hv_entry | 552 | .global kvmppc_hv_entry |
523 | kvmppc_hv_entry: | 553 | kvmppc_hv_entry: |
@@ -530,6 +560,7 @@ kvmppc_hv_entry: | |||
530 | * R1 = host R1 | 560 | * R1 = host R1 |
531 | * R2 = TOC | 561 | * R2 = TOC |
532 | * all other volatile GPRS = free | 562 | * all other volatile GPRS = free |
563 | * Does not preserve non-volatile GPRs or CR fields | ||
533 | */ | 564 | */ |
534 | mflr r0 | 565 | mflr r0 |
535 | std r0, PPC_LR_STKOFF(r1) | 566 | std r0, PPC_LR_STKOFF(r1) |
@@ -549,32 +580,38 @@ kvmppc_hv_entry: | |||
549 | bl kvmhv_start_timing | 580 | bl kvmhv_start_timing |
550 | 1: | 581 | 1: |
551 | #endif | 582 | #endif |
552 | /* Clear out SLB */ | 583 | |
584 | /* Use cr7 as an indication of radix mode */ | ||
585 | ld r5, HSTATE_KVM_VCORE(r13) | ||
586 | ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ | ||
587 | lbz r0, KVM_RADIX(r9) | ||
588 | cmpwi cr7, r0, 0 | ||
589 | |||
590 | /* Clear out SLB if hash */ | ||
591 | bne cr7, 2f | ||
553 | li r6,0 | 592 | li r6,0 |
554 | slbmte r6,r6 | 593 | slbmte r6,r6 |
555 | slbia | 594 | slbia |
556 | ptesync | 595 | ptesync |
557 | 596 | 2: | |
558 | /* | 597 | /* |
559 | * POWER7/POWER8 host -> guest partition switch code. | 598 | * POWER7/POWER8 host -> guest partition switch code. |
560 | * We don't have to lock against concurrent tlbies, | 599 | * We don't have to lock against concurrent tlbies, |
561 | * but we do have to coordinate across hardware threads. | 600 | * but we do have to coordinate across hardware threads. |
562 | */ | 601 | */ |
563 | /* Set bit in entry map iff exit map is zero. */ | 602 | /* Set bit in entry map iff exit map is zero. */ |
564 | ld r5, HSTATE_KVM_VCORE(r13) | ||
565 | li r7, 1 | 603 | li r7, 1 |
566 | lbz r6, HSTATE_PTID(r13) | 604 | lbz r6, HSTATE_PTID(r13) |
567 | sld r7, r7, r6 | 605 | sld r7, r7, r6 |
568 | addi r9, r5, VCORE_ENTRY_EXIT | 606 | addi r8, r5, VCORE_ENTRY_EXIT |
569 | 21: lwarx r3, 0, r9 | 607 | 21: lwarx r3, 0, r8 |
570 | cmpwi r3, 0x100 /* any threads starting to exit? */ | 608 | cmpwi r3, 0x100 /* any threads starting to exit? */ |
571 | bge secondary_too_late /* if so we're too late to the party */ | 609 | bge secondary_too_late /* if so we're too late to the party */ |
572 | or r3, r3, r7 | 610 | or r3, r3, r7 |
573 | stwcx. r3, 0, r9 | 611 | stwcx. r3, 0, r8 |
574 | bne 21b | 612 | bne 21b |
575 | 613 | ||
576 | /* Primary thread switches to guest partition. */ | 614 | /* Primary thread switches to guest partition. */ |
577 | ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ | ||
578 | cmpwi r6,0 | 615 | cmpwi r6,0 |
579 | bne 10f | 616 | bne 10f |
580 | lwz r7,KVM_LPID(r9) | 617 | lwz r7,KVM_LPID(r9) |
@@ -590,30 +627,44 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) | |||
590 | 627 | ||
591 | /* See if we need to flush the TLB */ | 628 | /* See if we need to flush the TLB */ |
592 | lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ | 629 | lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ |
630 | BEGIN_FTR_SECTION | ||
631 | /* | ||
632 | * On POWER9, individual threads can come in here, but the | ||
633 | * TLB is shared between the 4 threads in a core, hence | ||
634 | * invalidating on one thread invalidates for all. | ||
635 | * Thus we make all 4 threads use the same bit here. | ||
636 | */ | ||
637 | clrrdi r6,r6,2 | ||
638 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
593 | clrldi r7,r6,64-6 /* extract bit number (6 bits) */ | 639 | clrldi r7,r6,64-6 /* extract bit number (6 bits) */ |
594 | srdi r6,r6,6 /* doubleword number */ | 640 | srdi r6,r6,6 /* doubleword number */ |
595 | sldi r6,r6,3 /* address offset */ | 641 | sldi r6,r6,3 /* address offset */ |
596 | add r6,r6,r9 | 642 | add r6,r6,r9 |
597 | addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ | 643 | addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ |
598 | li r0,1 | 644 | li r8,1 |
599 | sld r0,r0,r7 | 645 | sld r8,r8,r7 |
600 | ld r7,0(r6) | 646 | ld r7,0(r6) |
601 | and. r7,r7,r0 | 647 | and. r7,r7,r8 |
602 | beq 22f | 648 | beq 22f |
603 | 23: ldarx r7,0,r6 /* if set, clear the bit */ | ||
604 | andc r7,r7,r0 | ||
605 | stdcx. r7,0,r6 | ||
606 | bne 23b | ||
607 | /* Flush the TLB of any entries for this LPID */ | 649 | /* Flush the TLB of any entries for this LPID */ |
608 | lwz r6,KVM_TLB_SETS(r9) | 650 | lwz r0,KVM_TLB_SETS(r9) |
609 | li r0,0 /* RS for P9 version of tlbiel */ | 651 | mtctr r0 |
610 | mtctr r6 | ||
611 | li r7,0x800 /* IS field = 0b10 */ | 652 | li r7,0x800 /* IS field = 0b10 */ |
612 | ptesync | 653 | ptesync |
613 | 28: tlbiel r7 | 654 | li r0,0 /* RS for P9 version of tlbiel */ |
655 | bne cr7, 29f | ||
656 | 28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ | ||
614 | addi r7,r7,0x1000 | 657 | addi r7,r7,0x1000 |
615 | bdnz 28b | 658 | bdnz 28b |
616 | ptesync | 659 | b 30f |
660 | 29: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */ | ||
661 | addi r7,r7,0x1000 | ||
662 | bdnz 29b | ||
663 | 30: ptesync | ||
664 | 23: ldarx r7,0,r6 /* clear the bit after TLB flushed */ | ||
665 | andc r7,r7,r8 | ||
666 | stdcx. r7,0,r6 | ||
667 | bne 23b | ||
617 | 668 | ||
618 | /* Add timebase offset onto timebase */ | 669 | /* Add timebase offset onto timebase */ |
619 | 22: ld r8,VCORE_TB_OFFSET(r5) | 670 | 22: ld r8,VCORE_TB_OFFSET(r5) |
@@ -658,7 +709,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
658 | beq kvmppc_primary_no_guest | 709 | beq kvmppc_primary_no_guest |
659 | kvmppc_got_guest: | 710 | kvmppc_got_guest: |
660 | 711 | ||
661 | /* Load up guest SLB entries */ | 712 | /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ |
662 | lwz r5,VCPU_SLB_MAX(r4) | 713 | lwz r5,VCPU_SLB_MAX(r4) |
663 | cmpwi r5,0 | 714 | cmpwi r5,0 |
664 | beq 9f | 715 | beq 9f |
@@ -696,8 +747,10 @@ kvmppc_got_guest: | |||
696 | BEGIN_FTR_SECTION | 747 | BEGIN_FTR_SECTION |
697 | mfspr r5, SPRN_TIDR | 748 | mfspr r5, SPRN_TIDR |
698 | mfspr r6, SPRN_PSSCR | 749 | mfspr r6, SPRN_PSSCR |
750 | mfspr r7, SPRN_PID | ||
699 | std r5, STACK_SLOT_TID(r1) | 751 | std r5, STACK_SLOT_TID(r1) |
700 | std r6, STACK_SLOT_PSSCR(r1) | 752 | std r6, STACK_SLOT_PSSCR(r1) |
753 | std r7, STACK_SLOT_PID(r1) | ||
701 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | 754 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) |
702 | 755 | ||
703 | BEGIN_FTR_SECTION | 756 | BEGIN_FTR_SECTION |
@@ -824,6 +877,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) | |||
824 | mtspr SPRN_PID, r7 | 877 | mtspr SPRN_PID, r7 |
825 | mtspr SPRN_WORT, r8 | 878 | mtspr SPRN_WORT, r8 |
826 | BEGIN_FTR_SECTION | 879 | BEGIN_FTR_SECTION |
880 | PPC_INVALIDATE_ERAT | ||
881 | END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) | ||
882 | BEGIN_FTR_SECTION | ||
827 | /* POWER8-only registers */ | 883 | /* POWER8-only registers */ |
828 | ld r5, VCPU_TCSCR(r4) | 884 | ld r5, VCPU_TCSCR(r4) |
829 | ld r6, VCPU_ACOP(r4) | 885 | ld r6, VCPU_ACOP(r4) |
@@ -1057,13 +1113,13 @@ hdec_soon: | |||
1057 | kvmppc_interrupt_hv: | 1113 | kvmppc_interrupt_hv: |
1058 | /* | 1114 | /* |
1059 | * Register contents: | 1115 | * Register contents: |
1060 | * R12 = interrupt vector | 1116 | * R12 = (guest CR << 32) | interrupt vector |
1061 | * R13 = PACA | 1117 | * R13 = PACA |
1062 | * guest CR, R12 saved in shadow VCPU SCRATCH1/0 | 1118 | * guest R12 saved in shadow VCPU SCRATCH0 |
1119 | * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE | ||
1063 | * guest R13 saved in SPRN_SCRATCH0 | 1120 | * guest R13 saved in SPRN_SCRATCH0 |
1064 | */ | 1121 | */ |
1065 | std r9, HSTATE_SCRATCH2(r13) | 1122 | std r9, HSTATE_SCRATCH2(r13) |
1066 | |||
1067 | lbz r9, HSTATE_IN_GUEST(r13) | 1123 | lbz r9, HSTATE_IN_GUEST(r13) |
1068 | cmpwi r9, KVM_GUEST_MODE_HOST_HV | 1124 | cmpwi r9, KVM_GUEST_MODE_HOST_HV |
1069 | beq kvmppc_bad_host_intr | 1125 | beq kvmppc_bad_host_intr |
@@ -1094,8 +1150,9 @@ kvmppc_interrupt_hv: | |||
1094 | std r10, VCPU_GPR(R10)(r9) | 1150 | std r10, VCPU_GPR(R10)(r9) |
1095 | std r11, VCPU_GPR(R11)(r9) | 1151 | std r11, VCPU_GPR(R11)(r9) |
1096 | ld r3, HSTATE_SCRATCH0(r13) | 1152 | ld r3, HSTATE_SCRATCH0(r13) |
1097 | lwz r4, HSTATE_SCRATCH1(r13) | ||
1098 | std r3, VCPU_GPR(R12)(r9) | 1153 | std r3, VCPU_GPR(R12)(r9) |
1154 | /* CR is in the high half of r12 */ | ||
1155 | srdi r4, r12, 32 | ||
1099 | stw r4, VCPU_CR(r9) | 1156 | stw r4, VCPU_CR(r9) |
1100 | BEGIN_FTR_SECTION | 1157 | BEGIN_FTR_SECTION |
1101 | ld r3, HSTATE_CFAR(r13) | 1158 | ld r3, HSTATE_CFAR(r13) |
@@ -1114,6 +1171,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | |||
1114 | mfspr r11, SPRN_SRR1 | 1171 | mfspr r11, SPRN_SRR1 |
1115 | std r10, VCPU_SRR0(r9) | 1172 | std r10, VCPU_SRR0(r9) |
1116 | std r11, VCPU_SRR1(r9) | 1173 | std r11, VCPU_SRR1(r9) |
1174 | /* trap is in the low half of r12, clear CR from the high half */ | ||
1175 | clrldi r12, r12, 32 | ||
1117 | andi. r0, r12, 2 /* need to read HSRR0/1? */ | 1176 | andi. r0, r12, 2 /* need to read HSRR0/1? */ |
1118 | beq 1f | 1177 | beq 1f |
1119 | mfspr r10, SPRN_HSRR0 | 1178 | mfspr r10, SPRN_HSRR0 |
@@ -1149,7 +1208,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | |||
1149 | 11: stw r3,VCPU_HEIR(r9) | 1208 | 11: stw r3,VCPU_HEIR(r9) |
1150 | 1209 | ||
1151 | /* these are volatile across C function calls */ | 1210 | /* these are volatile across C function calls */ |
1211 | #ifdef CONFIG_RELOCATABLE | ||
1212 | ld r3, HSTATE_SCRATCH1(r13) | ||
1213 | mtctr r3 | ||
1214 | #else | ||
1152 | mfctr r3 | 1215 | mfctr r3 |
1216 | #endif | ||
1153 | mfxer r4 | 1217 | mfxer r4 |
1154 | std r3, VCPU_CTR(r9) | 1218 | std r3, VCPU_CTR(r9) |
1155 | std r4, VCPU_XER(r9) | 1219 | std r4, VCPU_XER(r9) |
@@ -1285,11 +1349,15 @@ mc_cont: | |||
1285 | mtspr SPRN_CTRLT,r6 | 1349 | mtspr SPRN_CTRLT,r6 |
1286 | 4: | 1350 | 4: |
1287 | /* Read the guest SLB and save it away */ | 1351 | /* Read the guest SLB and save it away */ |
1352 | ld r5, VCPU_KVM(r9) | ||
1353 | lbz r0, KVM_RADIX(r5) | ||
1354 | cmpwi r0, 0 | ||
1355 | li r5, 0 | ||
1356 | bne 3f /* for radix, save 0 entries */ | ||
1288 | lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ | 1357 | lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ |
1289 | mtctr r0 | 1358 | mtctr r0 |
1290 | li r6,0 | 1359 | li r6,0 |
1291 | addi r7,r9,VCPU_SLB | 1360 | addi r7,r9,VCPU_SLB |
1292 | li r5,0 | ||
1293 | 1: slbmfee r8,r6 | 1361 | 1: slbmfee r8,r6 |
1294 | andis. r0,r8,SLB_ESID_V@h | 1362 | andis. r0,r8,SLB_ESID_V@h |
1295 | beq 2f | 1363 | beq 2f |
@@ -1301,7 +1369,7 @@ mc_cont: | |||
1301 | addi r5,r5,1 | 1369 | addi r5,r5,1 |
1302 | 2: addi r6,r6,1 | 1370 | 2: addi r6,r6,1 |
1303 | bdnz 1b | 1371 | bdnz 1b |
1304 | stw r5,VCPU_SLB_MAX(r9) | 1372 | 3: stw r5,VCPU_SLB_MAX(r9) |
1305 | 1373 | ||
1306 | /* | 1374 | /* |
1307 | * Save the guest PURR/SPURR | 1375 | * Save the guest PURR/SPURR |
@@ -1550,9 +1618,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
1550 | BEGIN_FTR_SECTION | 1618 | BEGIN_FTR_SECTION |
1551 | ld r5, STACK_SLOT_TID(r1) | 1619 | ld r5, STACK_SLOT_TID(r1) |
1552 | ld r6, STACK_SLOT_PSSCR(r1) | 1620 | ld r6, STACK_SLOT_PSSCR(r1) |
1621 | ld r7, STACK_SLOT_PID(r1) | ||
1553 | mtspr SPRN_TIDR, r5 | 1622 | mtspr SPRN_TIDR, r5 |
1554 | mtspr SPRN_PSSCR, r6 | 1623 | mtspr SPRN_PSSCR, r6 |
1624 | mtspr SPRN_PID, r7 | ||
1555 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | 1625 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) |
1626 | BEGIN_FTR_SECTION | ||
1627 | PPC_INVALIDATE_ERAT | ||
1628 | END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) | ||
1556 | 1629 | ||
1557 | /* | 1630 | /* |
1558 | * POWER7/POWER8 guest -> host partition switch code. | 1631 | * POWER7/POWER8 guest -> host partition switch code. |
@@ -1663,6 +1736,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
1663 | isync | 1736 | isync |
1664 | 1737 | ||
1665 | /* load host SLB entries */ | 1738 | /* load host SLB entries */ |
1739 | BEGIN_MMU_FTR_SECTION | ||
1740 | b 0f | ||
1741 | END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) | ||
1666 | ld r8,PACA_SLBSHADOWPTR(r13) | 1742 | ld r8,PACA_SLBSHADOWPTR(r13) |
1667 | 1743 | ||
1668 | .rept SLB_NUM_BOLTED | 1744 | .rept SLB_NUM_BOLTED |
@@ -1675,7 +1751,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
1675 | slbmte r6,r5 | 1751 | slbmte r6,r5 |
1676 | 1: addi r8,r8,16 | 1752 | 1: addi r8,r8,16 |
1677 | .endr | 1753 | .endr |
1678 | 1754 | 0: | |
1679 | #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING | 1755 | #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING |
1680 | /* Finish timing, if we have a vcpu */ | 1756 | /* Finish timing, if we have a vcpu */ |
1681 | ld r4, HSTATE_KVM_VCPU(r13) | 1757 | ld r4, HSTATE_KVM_VCPU(r13) |
@@ -1702,11 +1778,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
1702 | * reflect the HDSI to the guest as a DSI. | 1778 | * reflect the HDSI to the guest as a DSI. |
1703 | */ | 1779 | */ |
1704 | kvmppc_hdsi: | 1780 | kvmppc_hdsi: |
1781 | ld r3, VCPU_KVM(r9) | ||
1782 | lbz r0, KVM_RADIX(r3) | ||
1783 | cmpwi r0, 0 | ||
1705 | mfspr r4, SPRN_HDAR | 1784 | mfspr r4, SPRN_HDAR |
1706 | mfspr r6, SPRN_HDSISR | 1785 | mfspr r6, SPRN_HDSISR |
1786 | bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ | ||
1707 | /* HPTE not found fault or protection fault? */ | 1787 | /* HPTE not found fault or protection fault? */ |
1708 | andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h | 1788 | andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h |
1709 | beq 1f /* if not, send it to the guest */ | 1789 | beq 1f /* if not, send it to the guest */ |
1790 | BEGIN_FTR_SECTION | ||
1791 | mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ | ||
1792 | b 4f | ||
1793 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
1710 | andi. r0, r11, MSR_DR /* data relocation enabled? */ | 1794 | andi. r0, r11, MSR_DR /* data relocation enabled? */ |
1711 | beq 3f | 1795 | beq 3f |
1712 | clrrdi r0, r4, 28 | 1796 | clrrdi r0, r4, 28 |
@@ -1776,13 +1860,29 @@ fast_interrupt_c_return: | |||
1776 | stb r0, HSTATE_IN_GUEST(r13) | 1860 | stb r0, HSTATE_IN_GUEST(r13) |
1777 | b guest_exit_cont | 1861 | b guest_exit_cont |
1778 | 1862 | ||
1863 | .Lradix_hdsi: | ||
1864 | std r4, VCPU_FAULT_DAR(r9) | ||
1865 | stw r6, VCPU_FAULT_DSISR(r9) | ||
1866 | .Lradix_hisi: | ||
1867 | mfspr r5, SPRN_ASDR | ||
1868 | std r5, VCPU_FAULT_GPA(r9) | ||
1869 | b guest_exit_cont | ||
1870 | |||
1779 | /* | 1871 | /* |
1780 | * Similarly for an HISI, reflect it to the guest as an ISI unless | 1872 | * Similarly for an HISI, reflect it to the guest as an ISI unless |
1781 | * it is an HPTE not found fault for a page that we have paged out. | 1873 | * it is an HPTE not found fault for a page that we have paged out. |
1782 | */ | 1874 | */ |
1783 | kvmppc_hisi: | 1875 | kvmppc_hisi: |
1876 | ld r3, VCPU_KVM(r9) | ||
1877 | lbz r0, KVM_RADIX(r3) | ||
1878 | cmpwi r0, 0 | ||
1879 | bne .Lradix_hisi /* for radix, just save ASDR */ | ||
1784 | andis. r0, r11, SRR1_ISI_NOPT@h | 1880 | andis. r0, r11, SRR1_ISI_NOPT@h |
1785 | beq 1f | 1881 | beq 1f |
1882 | BEGIN_FTR_SECTION | ||
1883 | mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ | ||
1884 | b 4f | ||
1885 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
1786 | andi. r0, r11, MSR_IR /* instruction relocation enabled? */ | 1886 | andi. r0, r11, MSR_IR /* instruction relocation enabled? */ |
1787 | beq 3f | 1887 | beq 3f |
1788 | clrrdi r0, r10, 28 | 1888 | clrrdi r0, r10, 28 |
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index ca8f174289bb..2a2b96d53999 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S | |||
@@ -167,20 +167,38 @@ kvmppc_handler_trampoline_enter_end: | |||
167 | * * | 167 | * * |
168 | *****************************************************************************/ | 168 | *****************************************************************************/ |
169 | 169 | ||
170 | .global kvmppc_handler_trampoline_exit | ||
171 | kvmppc_handler_trampoline_exit: | ||
172 | |||
173 | .global kvmppc_interrupt_pr | 170 | .global kvmppc_interrupt_pr |
174 | kvmppc_interrupt_pr: | 171 | kvmppc_interrupt_pr: |
172 | /* 64-bit entry. Register usage at this point: | ||
173 | * | ||
174 | * SPRG_SCRATCH0 = guest R13 | ||
175 | * R12 = (guest CR << 32) | exit handler id | ||
176 | * R13 = PACA | ||
177 | * HSTATE.SCRATCH0 = guest R12 | ||
178 | * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE | ||
179 | */ | ||
180 | #ifdef CONFIG_PPC64 | ||
181 | /* Match 32-bit entry */ | ||
182 | #ifdef CONFIG_RELOCATABLE | ||
183 | std r9, HSTATE_SCRATCH2(r13) | ||
184 | ld r9, HSTATE_SCRATCH1(r13) | ||
185 | mtctr r9 | ||
186 | ld r9, HSTATE_SCRATCH2(r13) | ||
187 | #endif | ||
188 | rotldi r12, r12, 32 /* Flip R12 halves for stw */ | ||
189 | stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */ | ||
190 | srdi r12, r12, 32 /* shift trap into low half */ | ||
191 | #endif | ||
175 | 192 | ||
193 | .global kvmppc_handler_trampoline_exit | ||
194 | kvmppc_handler_trampoline_exit: | ||
176 | /* Register usage at this point: | 195 | /* Register usage at this point: |
177 | * | 196 | * |
178 | * SPRG_SCRATCH0 = guest R13 | 197 | * SPRG_SCRATCH0 = guest R13 |
179 | * R12 = exit handler id | 198 | * R12 = exit handler id |
180 | * R13 = shadow vcpu (32-bit) or PACA (64-bit) | 199 | * R13 = shadow vcpu (32-bit) or PACA (64-bit) |
181 | * HSTATE.SCRATCH0 = guest R12 | 200 | * HSTATE.SCRATCH0 = guest R12 |
182 | * HSTATE.SCRATCH1 = guest CR | 201 | * HSTATE.SCRATCH1 = guest CR |
183 | * | ||
184 | */ | 202 | */ |
185 | 203 | ||
186 | /* Save registers */ | 204 | /* Save registers */ |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cd892dec7cb6..40a5b2d75ed1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -565,6 +565,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
565 | case KVM_CAP_PPC_HWRNG: | 565 | case KVM_CAP_PPC_HWRNG: |
566 | r = kvmppc_hwrng_present(); | 566 | r = kvmppc_hwrng_present(); |
567 | break; | 567 | break; |
568 | case KVM_CAP_PPC_MMU_RADIX: | ||
569 | r = !!(hv_enabled && radix_enabled()); | ||
570 | break; | ||
571 | case KVM_CAP_PPC_MMU_HASH_V3: | ||
572 | r = !!(hv_enabled && !radix_enabled() && | ||
573 | cpu_has_feature(CPU_FTR_ARCH_300)); | ||
574 | break; | ||
568 | #endif | 575 | #endif |
569 | case KVM_CAP_SYNC_MMU: | 576 | case KVM_CAP_SYNC_MMU: |
570 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 577 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
@@ -1468,6 +1475,31 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
1468 | r = kvm_vm_ioctl_rtas_define_token(kvm, argp); | 1475 | r = kvm_vm_ioctl_rtas_define_token(kvm, argp); |
1469 | break; | 1476 | break; |
1470 | } | 1477 | } |
1478 | case KVM_PPC_CONFIGURE_V3_MMU: { | ||
1479 | struct kvm *kvm = filp->private_data; | ||
1480 | struct kvm_ppc_mmuv3_cfg cfg; | ||
1481 | |||
1482 | r = -EINVAL; | ||
1483 | if (!kvm->arch.kvm_ops->configure_mmu) | ||
1484 | goto out; | ||
1485 | r = -EFAULT; | ||
1486 | if (copy_from_user(&cfg, argp, sizeof(cfg))) | ||
1487 | goto out; | ||
1488 | r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg); | ||
1489 | break; | ||
1490 | } | ||
1491 | case KVM_PPC_GET_RMMU_INFO: { | ||
1492 | struct kvm *kvm = filp->private_data; | ||
1493 | struct kvm_ppc_rmmu_info info; | ||
1494 | |||
1495 | r = -EINVAL; | ||
1496 | if (!kvm->arch.kvm_ops->get_rmmu_info) | ||
1497 | goto out; | ||
1498 | r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info); | ||
1499 | if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) | ||
1500 | r = -EFAULT; | ||
1501 | break; | ||
1502 | } | ||
1471 | default: { | 1503 | default: { |
1472 | struct kvm *kvm = filp->private_data; | 1504 | struct kvm *kvm = filp->private_data; |
1473 | r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); | 1505 | r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); |
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index a175cd82ae8c..2be5dc242832 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c | |||
@@ -41,6 +41,7 @@ static void pmd_ctor(void *addr) | |||
41 | } | 41 | } |
42 | 42 | ||
43 | struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; | 43 | struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; |
44 | EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ | ||
44 | 45 | ||
45 | /* | 46 | /* |
46 | * Create a kmem_cache() for pagetables. This is not used for PTE | 47 | * Create a kmem_cache() for pagetables. This is not used for PTE |
@@ -82,7 +83,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) | |||
82 | pgtable_cache[shift - 1] = new; | 83 | pgtable_cache[shift - 1] = new; |
83 | pr_debug("Allocated pgtable cache for order %d\n", shift); | 84 | pr_debug("Allocated pgtable cache for order %d\n", shift); |
84 | } | 85 | } |
85 | 86 | EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */ | |
86 | 87 | ||
87 | void pgtable_cache_init(void) | 88 | void pgtable_cache_init(void) |
88 | { | 89 | { |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 93abf8a9813d..10c9a545a646 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -42,6 +42,8 @@ | |||
42 | #include <linux/memblock.h> | 42 | #include <linux/memblock.h> |
43 | #include <linux/hugetlb.h> | 43 | #include <linux/hugetlb.h> |
44 | #include <linux/slab.h> | 44 | #include <linux/slab.h> |
45 | #include <linux/of_fdt.h> | ||
46 | #include <linux/libfdt.h> | ||
45 | 47 | ||
46 | #include <asm/pgalloc.h> | 48 | #include <asm/pgalloc.h> |
47 | #include <asm/page.h> | 49 | #include <asm/page.h> |
@@ -344,12 +346,45 @@ static int __init parse_disable_radix(char *p) | |||
344 | } | 346 | } |
345 | early_param("disable_radix", parse_disable_radix); | 347 | early_param("disable_radix", parse_disable_radix); |
346 | 348 | ||
349 | /* | ||
350 | * If we're running under a hypervisor, we need to check the contents of | ||
351 | * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do | ||
352 | * radix. If not, we clear the radix feature bit so we fall back to hash. | ||
353 | */ | ||
354 | static void early_check_vec5(void) | ||
355 | { | ||
356 | unsigned long root, chosen; | ||
357 | int size; | ||
358 | const u8 *vec5; | ||
359 | |||
360 | root = of_get_flat_dt_root(); | ||
361 | chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); | ||
362 | if (chosen == -FDT_ERR_NOTFOUND) | ||
363 | return; | ||
364 | vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); | ||
365 | if (!vec5) | ||
366 | return; | ||
367 | if (size <= OV5_INDX(OV5_MMU_RADIX_300) || | ||
368 | !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) | ||
369 | /* Hypervisor doesn't support radix */ | ||
370 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; | ||
371 | } | ||
372 | |||
347 | void __init mmu_early_init_devtree(void) | 373 | void __init mmu_early_init_devtree(void) |
348 | { | 374 | { |
349 | /* Disable radix mode based on kernel command line. */ | 375 | /* Disable radix mode based on kernel command line. */ |
350 | if (disable_radix) | 376 | if (disable_radix) |
351 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; | 377 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; |
352 | 378 | ||
379 | /* | ||
380 | * Check /chosen/ibm,architecture-vec-5 if running as a guest. | ||
381 | * When running bare-metal, we can use radix if we like | ||
382 | * even though the ibm,architecture-vec-5 property created by | ||
383 | * skiboot doesn't have the necessary bits set. | ||
384 | */ | ||
385 | if (early_radix_enabled() && !(mfmsr() & MSR_HV)) | ||
386 | early_check_vec5(); | ||
387 | |||
353 | if (early_radix_enabled()) | 388 | if (early_radix_enabled()) |
354 | radix__early_init_devtree(); | 389 | radix__early_init_devtree(); |
355 | else | 390 | else |
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index cfa53ccc8baf..94323c4ececc 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c | |||
@@ -401,6 +401,8 @@ void __init radix__early_init_mmu(void) | |||
401 | mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); | 401 | mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); |
402 | radix_init_partition_table(); | 402 | radix_init_partition_table(); |
403 | radix_init_amor(); | 403 | radix_init_amor(); |
404 | } else { | ||
405 | radix_init_pseries(); | ||
404 | } | 406 | } |
405 | 407 | ||
406 | memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); | 408 | memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 8bca7f58afc4..d6b5e5cde412 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -454,13 +454,23 @@ void __init mmu_partition_table_init(void) | |||
454 | void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, | 454 | void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, |
455 | unsigned long dw1) | 455 | unsigned long dw1) |
456 | { | 456 | { |
457 | unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); | ||
458 | |||
457 | partition_tb[lpid].patb0 = cpu_to_be64(dw0); | 459 | partition_tb[lpid].patb0 = cpu_to_be64(dw0); |
458 | partition_tb[lpid].patb1 = cpu_to_be64(dw1); | 460 | partition_tb[lpid].patb1 = cpu_to_be64(dw1); |
459 | 461 | ||
460 | /* Global flush of TLBs and partition table caches for this lpid */ | 462 | /* |
463 | * Global flush of TLBs and partition table caches for this lpid. | ||
464 | * The type of flush (hash or radix) depends on what the previous | ||
465 | * use of this partition ID was, not the new use. | ||
466 | */ | ||
461 | asm volatile("ptesync" : : : "memory"); | 467 | asm volatile("ptesync" : : : "memory"); |
462 | asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : | 468 | if (old & PATB_HR) |
463 | "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); | 469 | asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : |
470 | "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); | ||
471 | else | ||
472 | asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : | ||
473 | "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); | ||
464 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | 474 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); |
465 | } | 475 | } |
466 | EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); | 476 | EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); |
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index ea7f09bd73b1..7d67623203b8 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c | |||
@@ -126,7 +126,7 @@ static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) | |||
126 | index = OV5_INDX(vec5_fw_features_table[i].feature); | 126 | index = OV5_INDX(vec5_fw_features_table[i].feature); |
127 | feat = OV5_FEAT(vec5_fw_features_table[i].feature); | 127 | feat = OV5_FEAT(vec5_fw_features_table[i].feature); |
128 | 128 | ||
129 | if (vec5[index] & feat) | 129 | if (index < len && (vec5[index] & feat)) |
130 | powerpc_firmware_features |= | 130 | powerpc_firmware_features |= |
131 | vec5_fw_features_table[i].val; | 131 | vec5_fw_features_table[i].val; |
132 | } | 132 | } |
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 5dc1c3c6e716..0587655aea69 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
@@ -609,6 +609,29 @@ static int __init disable_bulk_remove(char *str) | |||
609 | 609 | ||
610 | __setup("bulk_remove=", disable_bulk_remove); | 610 | __setup("bulk_remove=", disable_bulk_remove); |
611 | 611 | ||
612 | /* Actually only used for radix, so far */ | ||
613 | static int pseries_lpar_register_process_table(unsigned long base, | ||
614 | unsigned long page_size, unsigned long table_size) | ||
615 | { | ||
616 | long rc; | ||
617 | unsigned long flags = PROC_TABLE_NEW; | ||
618 | |||
619 | if (radix_enabled()) | ||
620 | flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; | ||
621 | for (;;) { | ||
622 | rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, | ||
623 | page_size, table_size); | ||
624 | if (!H_IS_LONG_BUSY(rc)) | ||
625 | break; | ||
626 | mdelay(get_longbusy_msecs(rc)); | ||
627 | } | ||
628 | if (rc != H_SUCCESS) { | ||
629 | pr_err("Failed to register process table (rc=%ld)\n", rc); | ||
630 | BUG(); | ||
631 | } | ||
632 | return rc; | ||
633 | } | ||
634 | |||
612 | void __init hpte_init_pseries(void) | 635 | void __init hpte_init_pseries(void) |
613 | { | 636 | { |
614 | mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; | 637 | mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; |
@@ -622,6 +645,12 @@ void __init hpte_init_pseries(void) | |||
622 | mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; | 645 | mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; |
623 | } | 646 | } |
624 | 647 | ||
648 | void radix_init_pseries(void) | ||
649 | { | ||
650 | pr_info("Using radix MMU under hypervisor\n"); | ||
651 | register_process_table = pseries_lpar_register_process_table; | ||
652 | } | ||
653 | |||
625 | #ifdef CONFIG_PPC_SMLPAR | 654 | #ifdef CONFIG_PPC_SMLPAR |
626 | #define CMO_FREE_HINT_DEFAULT 1 | 655 | #define CMO_FREE_HINT_DEFAULT 1 |
627 | static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; | 656 | static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cac48eda1075..e0035808c814 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -871,6 +871,8 @@ struct kvm_ppc_smmu_info { | |||
871 | #define KVM_CAP_S390_USER_INSTR0 130 | 871 | #define KVM_CAP_S390_USER_INSTR0 130 |
872 | #define KVM_CAP_MSI_DEVID 131 | 872 | #define KVM_CAP_MSI_DEVID 131 |
873 | #define KVM_CAP_PPC_HTM 132 | 873 | #define KVM_CAP_PPC_HTM 132 |
874 | #define KVM_CAP_PPC_MMU_RADIX 134 | ||
875 | #define KVM_CAP_PPC_MMU_HASH_V3 135 | ||
874 | 876 | ||
875 | #ifdef KVM_CAP_IRQ_ROUTING | 877 | #ifdef KVM_CAP_IRQ_ROUTING |
876 | 878 | ||
@@ -1187,6 +1189,10 @@ struct kvm_s390_ucas_mapping { | |||
1187 | #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) | 1189 | #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) |
1188 | /* Available with KVM_CAP_PPC_RTAS */ | 1190 | /* Available with KVM_CAP_PPC_RTAS */ |
1189 | #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) | 1191 | #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) |
1192 | /* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ | ||
1193 | #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) | ||
1194 | /* Available with KVM_CAP_PPC_RADIX_MMU */ | ||
1195 | #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) | ||
1190 | 1196 | ||
1191 | /* ioctl for vm fd */ | 1197 | /* ioctl for vm fd */ |
1192 | #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) | 1198 | #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) |