aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-19 14:27:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-19 14:27:09 -0400
commit7beaa24ba49717419e24d1f6321e8b3c265a719c (patch)
treea5c5433d3c7bfc4c23e67174463ccf519c8406f0 /arch/arm/kvm
parent07b75260ebc2c789724c594d7eaf0194fa47b3be (diff)
parent9842df62004f366b9fed2423e24df10542ee0dc5 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "Small release overall. x86: - miscellaneous fixes - AVIC support (local APIC virtualization, AMD version) s390: - polling for interrupts after a VCPU goes to halted state is now enabled for s390 - use hardware provided information about facility bits that do not need any hypervisor activity, and other fixes for cpu models and facilities - improve perf output - floating interrupt controller improvements. MIPS: - miscellaneous fixes PPC: - bugfixes only ARM: - 16K page size support - generic firmware probing layer for timer and GIC Christoffer Dall (KVM-ARM maintainer) says: "There are a few changes in this pull request touching things outside KVM, but they should all carry the necessary acks and it made the merge process much easier to do it this way." though actually the irqchip maintainers' acks didn't make it into the patches. Marc Zyngier, who is both irqchip and KVM-ARM maintainer, later acked at http://mid.gmane.org/573351D1.4060303@arm.com ('more formally and for documentation purposes')" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (82 commits) KVM: MTRR: remove MSR 0x2f8 KVM: x86: make hwapic_isr_update and hwapic_irr_update look the same svm: Manage vcpu load/unload when enable AVIC svm: Do not intercept CR8 when enable AVIC svm: Do not expose x2APIC when enable AVIC KVM: x86: Introducing kvm_x86_ops.apicv_post_state_restore svm: Add VMEXIT handlers for AVIC svm: Add interrupt injection via AVIC KVM: x86: Detect and Initialize AVIC support svm: Introduce new AVIC VMCB registers KVM: split kvm_vcpu_wake_up from kvm_vcpu_kick KVM: x86: Introducing kvm_x86_ops VCPU blocking/unblocking hooks KVM: x86: Introducing kvm_x86_ops VM init/destroy hooks KVM: x86: Rename kvm_apic_get_reg to kvm_lapic_get_reg KVM: x86: Misc LAPIC changes to expose helper functions KVM: shrink halt polling even more for invalid wakeups KVM: s390: set halt polling to 80 microseconds KVM: halt_polling: provide a way to qualify wakeups during poll KVM: PPC: Book3S HV: Re-enable XICS fast path for irqfd-generated interrupts kvm: Conditionally register IRQ bypass consumer ...
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r--arch/arm/kvm/arm.c2
-rw-r--r--arch/arm/kvm/mmu.c408
2 files changed, 222 insertions, 188 deletions
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9ef013d86cc5..237d5d82f0af 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -444,7 +444,7 @@ static void update_vttbr(struct kvm *kvm)
444 kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; 444 kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
445 445
446 /* update vttbr to be used with the new vmid */ 446 /* update vttbr to be used with the new vmid */
447 pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); 447 pgd_phys = virt_to_phys(kvm->arch.pgd);
448 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 448 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
449 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); 449 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
450 kvm->arch.vttbr = pgd_phys | vmid; 450 kvm->arch.vttbr = pgd_phys | vmid;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index be302128c5d7..45c43aecb8f2 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -43,11 +43,9 @@ static unsigned long hyp_idmap_start;
43static unsigned long hyp_idmap_end; 43static unsigned long hyp_idmap_end;
44static phys_addr_t hyp_idmap_vector; 44static phys_addr_t hyp_idmap_vector;
45 45
46#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
46#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 47#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
47 48
48#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
49#define kvm_pud_huge(_x) pud_huge(_x)
50
51#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) 49#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
52#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) 50#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
53 51
@@ -69,14 +67,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
69 67
70static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 68static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
71{ 69{
72 /* 70 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
73 * This function also gets called when dealing with HYP page
74 * tables. As HYP doesn't have an associated struct kvm (and
75 * the HYP page tables are fairly static), we don't do
76 * anything there.
77 */
78 if (kvm)
79 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
80} 71}
81 72
82/* 73/*
@@ -115,7 +106,7 @@ static bool kvm_is_device_pfn(unsigned long pfn)
115 */ 106 */
116static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) 107static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
117{ 108{
118 if (!kvm_pmd_huge(*pmd)) 109 if (!pmd_thp_or_huge(*pmd))
119 return; 110 return;
120 111
121 pmd_clear(pmd); 112 pmd_clear(pmd);
@@ -155,29 +146,29 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
155 return p; 146 return p;
156} 147}
157 148
158static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 149static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
159{ 150{
160 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); 151 pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
161 pgd_clear(pgd); 152 stage2_pgd_clear(pgd);
162 kvm_tlb_flush_vmid_ipa(kvm, addr); 153 kvm_tlb_flush_vmid_ipa(kvm, addr);
163 pud_free(NULL, pud_table); 154 stage2_pud_free(pud_table);
164 put_page(virt_to_page(pgd)); 155 put_page(virt_to_page(pgd));
165} 156}
166 157
167static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 158static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
168{ 159{
169 pmd_t *pmd_table = pmd_offset(pud, 0); 160 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
170 VM_BUG_ON(pud_huge(*pud)); 161 VM_BUG_ON(stage2_pud_huge(*pud));
171 pud_clear(pud); 162 stage2_pud_clear(pud);
172 kvm_tlb_flush_vmid_ipa(kvm, addr); 163 kvm_tlb_flush_vmid_ipa(kvm, addr);
173 pmd_free(NULL, pmd_table); 164 stage2_pmd_free(pmd_table);
174 put_page(virt_to_page(pud)); 165 put_page(virt_to_page(pud));
175} 166}
176 167
177static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 168static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
178{ 169{
179 pte_t *pte_table = pte_offset_kernel(pmd, 0); 170 pte_t *pte_table = pte_offset_kernel(pmd, 0);
180 VM_BUG_ON(kvm_pmd_huge(*pmd)); 171 VM_BUG_ON(pmd_thp_or_huge(*pmd));
181 pmd_clear(pmd); 172 pmd_clear(pmd);
182 kvm_tlb_flush_vmid_ipa(kvm, addr); 173 kvm_tlb_flush_vmid_ipa(kvm, addr);
183 pte_free_kernel(NULL, pte_table); 174 pte_free_kernel(NULL, pte_table);
@@ -204,7 +195,7 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
204 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure 195 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
205 * the IO subsystem will never hit in the cache. 196 * the IO subsystem will never hit in the cache.
206 */ 197 */
207static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 198static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
208 phys_addr_t addr, phys_addr_t end) 199 phys_addr_t addr, phys_addr_t end)
209{ 200{
210 phys_addr_t start_addr = addr; 201 phys_addr_t start_addr = addr;
@@ -226,21 +217,21 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
226 } 217 }
227 } while (pte++, addr += PAGE_SIZE, addr != end); 218 } while (pte++, addr += PAGE_SIZE, addr != end);
228 219
229 if (kvm_pte_table_empty(kvm, start_pte)) 220 if (stage2_pte_table_empty(start_pte))
230 clear_pmd_entry(kvm, pmd, start_addr); 221 clear_stage2_pmd_entry(kvm, pmd, start_addr);
231} 222}
232 223
233static void unmap_pmds(struct kvm *kvm, pud_t *pud, 224static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
234 phys_addr_t addr, phys_addr_t end) 225 phys_addr_t addr, phys_addr_t end)
235{ 226{
236 phys_addr_t next, start_addr = addr; 227 phys_addr_t next, start_addr = addr;
237 pmd_t *pmd, *start_pmd; 228 pmd_t *pmd, *start_pmd;
238 229
239 start_pmd = pmd = pmd_offset(pud, addr); 230 start_pmd = pmd = stage2_pmd_offset(pud, addr);
240 do { 231 do {
241 next = kvm_pmd_addr_end(addr, end); 232 next = stage2_pmd_addr_end(addr, end);
242 if (!pmd_none(*pmd)) { 233 if (!pmd_none(*pmd)) {
243 if (kvm_pmd_huge(*pmd)) { 234 if (pmd_thp_or_huge(*pmd)) {
244 pmd_t old_pmd = *pmd; 235 pmd_t old_pmd = *pmd;
245 236
246 pmd_clear(pmd); 237 pmd_clear(pmd);
@@ -250,57 +241,64 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
250 241
251 put_page(virt_to_page(pmd)); 242 put_page(virt_to_page(pmd));
252 } else { 243 } else {
253 unmap_ptes(kvm, pmd, addr, next); 244 unmap_stage2_ptes(kvm, pmd, addr, next);
254 } 245 }
255 } 246 }
256 } while (pmd++, addr = next, addr != end); 247 } while (pmd++, addr = next, addr != end);
257 248
258 if (kvm_pmd_table_empty(kvm, start_pmd)) 249 if (stage2_pmd_table_empty(start_pmd))
259 clear_pud_entry(kvm, pud, start_addr); 250 clear_stage2_pud_entry(kvm, pud, start_addr);
260} 251}
261 252
262static void unmap_puds(struct kvm *kvm, pgd_t *pgd, 253static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
263 phys_addr_t addr, phys_addr_t end) 254 phys_addr_t addr, phys_addr_t end)
264{ 255{
265 phys_addr_t next, start_addr = addr; 256 phys_addr_t next, start_addr = addr;
266 pud_t *pud, *start_pud; 257 pud_t *pud, *start_pud;
267 258
268 start_pud = pud = pud_offset(pgd, addr); 259 start_pud = pud = stage2_pud_offset(pgd, addr);
269 do { 260 do {
270 next = kvm_pud_addr_end(addr, end); 261 next = stage2_pud_addr_end(addr, end);
271 if (!pud_none(*pud)) { 262 if (!stage2_pud_none(*pud)) {
272 if (pud_huge(*pud)) { 263 if (stage2_pud_huge(*pud)) {
273 pud_t old_pud = *pud; 264 pud_t old_pud = *pud;
274 265
275 pud_clear(pud); 266 stage2_pud_clear(pud);
276 kvm_tlb_flush_vmid_ipa(kvm, addr); 267 kvm_tlb_flush_vmid_ipa(kvm, addr);
277
278 kvm_flush_dcache_pud(old_pud); 268 kvm_flush_dcache_pud(old_pud);
279
280 put_page(virt_to_page(pud)); 269 put_page(virt_to_page(pud));
281 } else { 270 } else {
282 unmap_pmds(kvm, pud, addr, next); 271 unmap_stage2_pmds(kvm, pud, addr, next);
283 } 272 }
284 } 273 }
285 } while (pud++, addr = next, addr != end); 274 } while (pud++, addr = next, addr != end);
286 275
287 if (kvm_pud_table_empty(kvm, start_pud)) 276 if (stage2_pud_table_empty(start_pud))
288 clear_pgd_entry(kvm, pgd, start_addr); 277 clear_stage2_pgd_entry(kvm, pgd, start_addr);
289} 278}
290 279
291 280/**
292static void unmap_range(struct kvm *kvm, pgd_t *pgdp, 281 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
293 phys_addr_t start, u64 size) 282 * @kvm: The VM pointer
283 * @start: The intermediate physical base address of the range to unmap
284 * @size: The size of the area to unmap
285 *
286 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
287 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
288 * destroying the VM), otherwise another faulting VCPU may come in and mess
289 * with things behind our backs.
290 */
291static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
294{ 292{
295 pgd_t *pgd; 293 pgd_t *pgd;
296 phys_addr_t addr = start, end = start + size; 294 phys_addr_t addr = start, end = start + size;
297 phys_addr_t next; 295 phys_addr_t next;
298 296
299 pgd = pgdp + kvm_pgd_index(addr); 297 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
300 do { 298 do {
301 next = kvm_pgd_addr_end(addr, end); 299 next = stage2_pgd_addr_end(addr, end);
302 if (!pgd_none(*pgd)) 300 if (!stage2_pgd_none(*pgd))
303 unmap_puds(kvm, pgd, addr, next); 301 unmap_stage2_puds(kvm, pgd, addr, next);
304 } while (pgd++, addr = next, addr != end); 302 } while (pgd++, addr = next, addr != end);
305} 303}
306 304
@@ -322,11 +320,11 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
322 pmd_t *pmd; 320 pmd_t *pmd;
323 phys_addr_t next; 321 phys_addr_t next;
324 322
325 pmd = pmd_offset(pud, addr); 323 pmd = stage2_pmd_offset(pud, addr);
326 do { 324 do {
327 next = kvm_pmd_addr_end(addr, end); 325 next = stage2_pmd_addr_end(addr, end);
328 if (!pmd_none(*pmd)) { 326 if (!pmd_none(*pmd)) {
329 if (kvm_pmd_huge(*pmd)) 327 if (pmd_thp_or_huge(*pmd))
330 kvm_flush_dcache_pmd(*pmd); 328 kvm_flush_dcache_pmd(*pmd);
331 else 329 else
332 stage2_flush_ptes(kvm, pmd, addr, next); 330 stage2_flush_ptes(kvm, pmd, addr, next);
@@ -340,11 +338,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
340 pud_t *pud; 338 pud_t *pud;
341 phys_addr_t next; 339 phys_addr_t next;
342 340
343 pud = pud_offset(pgd, addr); 341 pud = stage2_pud_offset(pgd, addr);
344 do { 342 do {
345 next = kvm_pud_addr_end(addr, end); 343 next = stage2_pud_addr_end(addr, end);
346 if (!pud_none(*pud)) { 344 if (!stage2_pud_none(*pud)) {
347 if (pud_huge(*pud)) 345 if (stage2_pud_huge(*pud))
348 kvm_flush_dcache_pud(*pud); 346 kvm_flush_dcache_pud(*pud);
349 else 347 else
350 stage2_flush_pmds(kvm, pud, addr, next); 348 stage2_flush_pmds(kvm, pud, addr, next);
@@ -360,9 +358,9 @@ static void stage2_flush_memslot(struct kvm *kvm,
360 phys_addr_t next; 358 phys_addr_t next;
361 pgd_t *pgd; 359 pgd_t *pgd;
362 360
363 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 361 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
364 do { 362 do {
365 next = kvm_pgd_addr_end(addr, end); 363 next = stage2_pgd_addr_end(addr, end);
366 stage2_flush_puds(kvm, pgd, addr, next); 364 stage2_flush_puds(kvm, pgd, addr, next);
367 } while (pgd++, addr = next, addr != end); 365 } while (pgd++, addr = next, addr != end);
368} 366}
@@ -391,6 +389,100 @@ static void stage2_flush_vm(struct kvm *kvm)
391 srcu_read_unlock(&kvm->srcu, idx); 389 srcu_read_unlock(&kvm->srcu, idx);
392} 390}
393 391
392static void clear_hyp_pgd_entry(pgd_t *pgd)
393{
394 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
395 pgd_clear(pgd);
396 pud_free(NULL, pud_table);
397 put_page(virt_to_page(pgd));
398}
399
400static void clear_hyp_pud_entry(pud_t *pud)
401{
402 pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
403 VM_BUG_ON(pud_huge(*pud));
404 pud_clear(pud);
405 pmd_free(NULL, pmd_table);
406 put_page(virt_to_page(pud));
407}
408
409static void clear_hyp_pmd_entry(pmd_t *pmd)
410{
411 pte_t *pte_table = pte_offset_kernel(pmd, 0);
412 VM_BUG_ON(pmd_thp_or_huge(*pmd));
413 pmd_clear(pmd);
414 pte_free_kernel(NULL, pte_table);
415 put_page(virt_to_page(pmd));
416}
417
418static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
419{
420 pte_t *pte, *start_pte;
421
422 start_pte = pte = pte_offset_kernel(pmd, addr);
423 do {
424 if (!pte_none(*pte)) {
425 kvm_set_pte(pte, __pte(0));
426 put_page(virt_to_page(pte));
427 }
428 } while (pte++, addr += PAGE_SIZE, addr != end);
429
430 if (hyp_pte_table_empty(start_pte))
431 clear_hyp_pmd_entry(pmd);
432}
433
434static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
435{
436 phys_addr_t next;
437 pmd_t *pmd, *start_pmd;
438
439 start_pmd = pmd = pmd_offset(pud, addr);
440 do {
441 next = pmd_addr_end(addr, end);
442 /* Hyp doesn't use huge pmds */
443 if (!pmd_none(*pmd))
444 unmap_hyp_ptes(pmd, addr, next);
445 } while (pmd++, addr = next, addr != end);
446
447 if (hyp_pmd_table_empty(start_pmd))
448 clear_hyp_pud_entry(pud);
449}
450
451static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
452{
453 phys_addr_t next;
454 pud_t *pud, *start_pud;
455
456 start_pud = pud = pud_offset(pgd, addr);
457 do {
458 next = pud_addr_end(addr, end);
459 /* Hyp doesn't use huge puds */
460 if (!pud_none(*pud))
461 unmap_hyp_pmds(pud, addr, next);
462 } while (pud++, addr = next, addr != end);
463
464 if (hyp_pud_table_empty(start_pud))
465 clear_hyp_pgd_entry(pgd);
466}
467
468static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
469{
470 pgd_t *pgd;
471 phys_addr_t addr = start, end = start + size;
472 phys_addr_t next;
473
474 /*
475 * We don't unmap anything from HYP, except at the hyp tear down.
476 * Hence, we don't have to invalidate the TLBs here.
477 */
478 pgd = pgdp + pgd_index(addr);
479 do {
480 next = pgd_addr_end(addr, end);
481 if (!pgd_none(*pgd))
482 unmap_hyp_puds(pgd, addr, next);
483 } while (pgd++, addr = next, addr != end);
484}
485
394/** 486/**
395 * free_boot_hyp_pgd - free HYP boot page tables 487 * free_boot_hyp_pgd - free HYP boot page tables
396 * 488 *
@@ -401,14 +493,14 @@ void free_boot_hyp_pgd(void)
401 mutex_lock(&kvm_hyp_pgd_mutex); 493 mutex_lock(&kvm_hyp_pgd_mutex);
402 494
403 if (boot_hyp_pgd) { 495 if (boot_hyp_pgd) {
404 unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 496 unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
405 unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 497 unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
406 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); 498 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
407 boot_hyp_pgd = NULL; 499 boot_hyp_pgd = NULL;
408 } 500 }
409 501
410 if (hyp_pgd) 502 if (hyp_pgd)
411 unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 503 unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
412 504
413 mutex_unlock(&kvm_hyp_pgd_mutex); 505 mutex_unlock(&kvm_hyp_pgd_mutex);
414} 506}
@@ -433,9 +525,9 @@ void free_hyp_pgds(void)
433 525
434 if (hyp_pgd) { 526 if (hyp_pgd) {
435 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 527 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
436 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 528 unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
437 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 529 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
438 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 530 unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
439 531
440 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 532 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
441 hyp_pgd = NULL; 533 hyp_pgd = NULL;
@@ -645,20 +737,6 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
645 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); 737 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
646} 738}
647 739
648/* Free the HW pgd, one page at a time */
649static void kvm_free_hwpgd(void *hwpgd)
650{
651 free_pages_exact(hwpgd, kvm_get_hwpgd_size());
652}
653
654/* Allocate the HW PGD, making sure that each page gets its own refcount */
655static void *kvm_alloc_hwpgd(void)
656{
657 unsigned int size = kvm_get_hwpgd_size();
658
659 return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
660}
661
662/** 740/**
663 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. 741 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
664 * @kvm: The KVM struct pointer for the VM. 742 * @kvm: The KVM struct pointer for the VM.
@@ -673,81 +751,22 @@ static void *kvm_alloc_hwpgd(void)
673int kvm_alloc_stage2_pgd(struct kvm *kvm) 751int kvm_alloc_stage2_pgd(struct kvm *kvm)
674{ 752{
675 pgd_t *pgd; 753 pgd_t *pgd;
676 void *hwpgd;
677 754
678 if (kvm->arch.pgd != NULL) { 755 if (kvm->arch.pgd != NULL) {
679 kvm_err("kvm_arch already initialized?\n"); 756 kvm_err("kvm_arch already initialized?\n");
680 return -EINVAL; 757 return -EINVAL;
681 } 758 }
682 759
683 hwpgd = kvm_alloc_hwpgd(); 760 /* Allocate the HW PGD, making sure that each page gets its own refcount */
684 if (!hwpgd) 761 pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
762 if (!pgd)
685 return -ENOMEM; 763 return -ENOMEM;
686 764
687 /* When the kernel uses more levels of page tables than the
688 * guest, we allocate a fake PGD and pre-populate it to point
689 * to the next-level page table, which will be the real
690 * initial page table pointed to by the VTTBR.
691 *
692 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
693 * the PMD and the kernel will use folded pud.
694 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
695 * pages.
696 */
697 if (KVM_PREALLOC_LEVEL > 0) {
698 int i;
699
700 /*
701 * Allocate fake pgd for the page table manipulation macros to
702 * work. This is not used by the hardware and we have no
703 * alignment requirement for this allocation.
704 */
705 pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
706 GFP_KERNEL | __GFP_ZERO);
707
708 if (!pgd) {
709 kvm_free_hwpgd(hwpgd);
710 return -ENOMEM;
711 }
712
713 /* Plug the HW PGD into the fake one. */
714 for (i = 0; i < PTRS_PER_S2_PGD; i++) {
715 if (KVM_PREALLOC_LEVEL == 1)
716 pgd_populate(NULL, pgd + i,
717 (pud_t *)hwpgd + i * PTRS_PER_PUD);
718 else if (KVM_PREALLOC_LEVEL == 2)
719 pud_populate(NULL, pud_offset(pgd, 0) + i,
720 (pmd_t *)hwpgd + i * PTRS_PER_PMD);
721 }
722 } else {
723 /*
724 * Allocate actual first-level Stage-2 page table used by the
725 * hardware for Stage-2 page table walks.
726 */
727 pgd = (pgd_t *)hwpgd;
728 }
729
730 kvm_clean_pgd(pgd); 765 kvm_clean_pgd(pgd);
731 kvm->arch.pgd = pgd; 766 kvm->arch.pgd = pgd;
732 return 0; 767 return 0;
733} 768}
734 769
735/**
736 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
737 * @kvm: The VM pointer
738 * @start: The intermediate physical base address of the range to unmap
739 * @size: The size of the area to unmap
740 *
741 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
742 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
743 * destroying the VM), otherwise another faulting VCPU may come in and mess
744 * with things behind our backs.
745 */
746static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
747{
748 unmap_range(kvm, kvm->arch.pgd, start, size);
749}
750
751static void stage2_unmap_memslot(struct kvm *kvm, 770static void stage2_unmap_memslot(struct kvm *kvm,
752 struct kvm_memory_slot *memslot) 771 struct kvm_memory_slot *memslot)
753{ 772{
@@ -830,10 +849,8 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
830 return; 849 return;
831 850
832 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 851 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
833 kvm_free_hwpgd(kvm_get_hwpgd(kvm)); 852 /* Free the HW pgd, one page at a time */
834 if (KVM_PREALLOC_LEVEL > 0) 853 free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
835 kfree(kvm->arch.pgd);
836
837 kvm->arch.pgd = NULL; 854 kvm->arch.pgd = NULL;
838} 855}
839 856
@@ -843,16 +860,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
843 pgd_t *pgd; 860 pgd_t *pgd;
844 pud_t *pud; 861 pud_t *pud;
845 862
846 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 863 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
847 if (WARN_ON(pgd_none(*pgd))) { 864 if (WARN_ON(stage2_pgd_none(*pgd))) {
848 if (!cache) 865 if (!cache)
849 return NULL; 866 return NULL;
850 pud = mmu_memory_cache_alloc(cache); 867 pud = mmu_memory_cache_alloc(cache);
851 pgd_populate(NULL, pgd, pud); 868 stage2_pgd_populate(pgd, pud);
852 get_page(virt_to_page(pgd)); 869 get_page(virt_to_page(pgd));
853 } 870 }
854 871
855 return pud_offset(pgd, addr); 872 return stage2_pud_offset(pgd, addr);
856} 873}
857 874
858static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 875static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -862,15 +879,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
862 pmd_t *pmd; 879 pmd_t *pmd;
863 880
864 pud = stage2_get_pud(kvm, cache, addr); 881 pud = stage2_get_pud(kvm, cache, addr);
865 if (pud_none(*pud)) { 882 if (stage2_pud_none(*pud)) {
866 if (!cache) 883 if (!cache)
867 return NULL; 884 return NULL;
868 pmd = mmu_memory_cache_alloc(cache); 885 pmd = mmu_memory_cache_alloc(cache);
869 pud_populate(NULL, pud, pmd); 886 stage2_pud_populate(pud, pmd);
870 get_page(virt_to_page(pud)); 887 get_page(virt_to_page(pud));
871 } 888 }
872 889
873 return pmd_offset(pud, addr); 890 return stage2_pmd_offset(pud, addr);
874} 891}
875 892
876static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache 893static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
@@ -893,11 +910,14 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
893 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); 910 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
894 911
895 old_pmd = *pmd; 912 old_pmd = *pmd;
896 kvm_set_pmd(pmd, *new_pmd); 913 if (pmd_present(old_pmd)) {
897 if (pmd_present(old_pmd)) 914 pmd_clear(pmd);
898 kvm_tlb_flush_vmid_ipa(kvm, addr); 915 kvm_tlb_flush_vmid_ipa(kvm, addr);
899 else 916 } else {
900 get_page(virt_to_page(pmd)); 917 get_page(virt_to_page(pmd));
918 }
919
920 kvm_set_pmd(pmd, *new_pmd);
901 return 0; 921 return 0;
902} 922}
903 923
@@ -946,15 +966,38 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
946 966
947 /* Create 2nd stage page table mapping - Level 3 */ 967 /* Create 2nd stage page table mapping - Level 3 */
948 old_pte = *pte; 968 old_pte = *pte;
949 kvm_set_pte(pte, *new_pte); 969 if (pte_present(old_pte)) {
950 if (pte_present(old_pte)) 970 kvm_set_pte(pte, __pte(0));
951 kvm_tlb_flush_vmid_ipa(kvm, addr); 971 kvm_tlb_flush_vmid_ipa(kvm, addr);
952 else 972 } else {
953 get_page(virt_to_page(pte)); 973 get_page(virt_to_page(pte));
974 }
954 975
976 kvm_set_pte(pte, *new_pte);
955 return 0; 977 return 0;
956} 978}
957 979
980#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
981static int stage2_ptep_test_and_clear_young(pte_t *pte)
982{
983 if (pte_young(*pte)) {
984 *pte = pte_mkold(*pte);
985 return 1;
986 }
987 return 0;
988}
989#else
990static int stage2_ptep_test_and_clear_young(pte_t *pte)
991{
992 return __ptep_test_and_clear_young(pte);
993}
994#endif
995
996static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
997{
998 return stage2_ptep_test_and_clear_young((pte_t *)pmd);
999}
1000
958/** 1001/**
959 * kvm_phys_addr_ioremap - map a device range to guest IPA 1002 * kvm_phys_addr_ioremap - map a device range to guest IPA
960 * 1003 *
@@ -978,7 +1021,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
978 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); 1021 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
979 1022
980 if (writable) 1023 if (writable)
981 kvm_set_s2pte_writable(&pte); 1024 pte = kvm_s2pte_mkwrite(pte);
982 1025
983 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, 1026 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
984 KVM_NR_MEM_OBJS); 1027 KVM_NR_MEM_OBJS);
@@ -1078,12 +1121,12 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1078 pmd_t *pmd; 1121 pmd_t *pmd;
1079 phys_addr_t next; 1122 phys_addr_t next;
1080 1123
1081 pmd = pmd_offset(pud, addr); 1124 pmd = stage2_pmd_offset(pud, addr);
1082 1125
1083 do { 1126 do {
1084 next = kvm_pmd_addr_end(addr, end); 1127 next = stage2_pmd_addr_end(addr, end);
1085 if (!pmd_none(*pmd)) { 1128 if (!pmd_none(*pmd)) {
1086 if (kvm_pmd_huge(*pmd)) { 1129 if (pmd_thp_or_huge(*pmd)) {
1087 if (!kvm_s2pmd_readonly(pmd)) 1130 if (!kvm_s2pmd_readonly(pmd))
1088 kvm_set_s2pmd_readonly(pmd); 1131 kvm_set_s2pmd_readonly(pmd);
1089 } else { 1132 } else {
@@ -1106,12 +1149,12 @@ static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1106 pud_t *pud; 1149 pud_t *pud;
1107 phys_addr_t next; 1150 phys_addr_t next;
1108 1151
1109 pud = pud_offset(pgd, addr); 1152 pud = stage2_pud_offset(pgd, addr);
1110 do { 1153 do {
1111 next = kvm_pud_addr_end(addr, end); 1154 next = stage2_pud_addr_end(addr, end);
1112 if (!pud_none(*pud)) { 1155 if (!stage2_pud_none(*pud)) {
1113 /* TODO:PUD not supported, revisit later if supported */ 1156 /* TODO:PUD not supported, revisit later if supported */
1114 BUG_ON(kvm_pud_huge(*pud)); 1157 BUG_ON(stage2_pud_huge(*pud));
1115 stage2_wp_pmds(pud, addr, next); 1158 stage2_wp_pmds(pud, addr, next);
1116 } 1159 }
1117 } while (pud++, addr = next, addr != end); 1160 } while (pud++, addr = next, addr != end);
@@ -1128,7 +1171,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1128 pgd_t *pgd; 1171 pgd_t *pgd;
1129 phys_addr_t next; 1172 phys_addr_t next;
1130 1173
1131 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 1174 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
1132 do { 1175 do {
1133 /* 1176 /*
1134 * Release kvm_mmu_lock periodically if the memory region is 1177 * Release kvm_mmu_lock periodically if the memory region is
@@ -1140,8 +1183,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1140 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) 1183 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
1141 cond_resched_lock(&kvm->mmu_lock); 1184 cond_resched_lock(&kvm->mmu_lock);
1142 1185
1143 next = kvm_pgd_addr_end(addr, end); 1186 next = stage2_pgd_addr_end(addr, end);
1144 if (pgd_present(*pgd)) 1187 if (stage2_pgd_present(*pgd))
1145 stage2_wp_puds(pgd, addr, next); 1188 stage2_wp_puds(pgd, addr, next);
1146 } while (pgd++, addr = next, addr != end); 1189 } while (pgd++, addr = next, addr != end);
1147} 1190}
@@ -1320,7 +1363,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1320 pmd_t new_pmd = pfn_pmd(pfn, mem_type); 1363 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
1321 new_pmd = pmd_mkhuge(new_pmd); 1364 new_pmd = pmd_mkhuge(new_pmd);
1322 if (writable) { 1365 if (writable) {
1323 kvm_set_s2pmd_writable(&new_pmd); 1366 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
1324 kvm_set_pfn_dirty(pfn); 1367 kvm_set_pfn_dirty(pfn);
1325 } 1368 }
1326 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); 1369 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
@@ -1329,7 +1372,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1329 pte_t new_pte = pfn_pte(pfn, mem_type); 1372 pte_t new_pte = pfn_pte(pfn, mem_type);
1330 1373
1331 if (writable) { 1374 if (writable) {
1332 kvm_set_s2pte_writable(&new_pte); 1375 new_pte = kvm_s2pte_mkwrite(new_pte);
1333 kvm_set_pfn_dirty(pfn); 1376 kvm_set_pfn_dirty(pfn);
1334 mark_page_dirty(kvm, gfn); 1377 mark_page_dirty(kvm, gfn);
1335 } 1378 }
@@ -1348,6 +1391,8 @@ out_unlock:
1348 * Resolve the access fault by making the page young again. 1391 * Resolve the access fault by making the page young again.
1349 * Note that because the faulting entry is guaranteed not to be 1392 * Note that because the faulting entry is guaranteed not to be
1350 * cached in the TLB, we don't need to invalidate anything. 1393 * cached in the TLB, we don't need to invalidate anything.
1394 * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
1395 * so there is no need for atomic (pte|pmd)_mkyoung operations.
1351 */ 1396 */
1352static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1397static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1353{ 1398{
@@ -1364,7 +1409,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1364 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1409 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1365 goto out; 1410 goto out;
1366 1411
1367 if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1412 if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */
1368 *pmd = pmd_mkyoung(*pmd); 1413 *pmd = pmd_mkyoung(*pmd);
1369 pfn = pmd_pfn(*pmd); 1414 pfn = pmd_pfn(*pmd);
1370 pfn_valid = true; 1415 pfn_valid = true;
@@ -1588,25 +1633,14 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1588 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1633 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1589 return 0; 1634 return 0;
1590 1635
1591 if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1636 if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
1592 if (pmd_young(*pmd)) { 1637 return stage2_pmdp_test_and_clear_young(pmd);
1593 *pmd = pmd_mkold(*pmd);
1594 return 1;
1595 }
1596
1597 return 0;
1598 }
1599 1638
1600 pte = pte_offset_kernel(pmd, gpa); 1639 pte = pte_offset_kernel(pmd, gpa);
1601 if (pte_none(*pte)) 1640 if (pte_none(*pte))
1602 return 0; 1641 return 0;
1603 1642
1604 if (pte_young(*pte)) { 1643 return stage2_ptep_test_and_clear_young(pte);
1605 *pte = pte_mkold(*pte); /* Just a page... */
1606 return 1;
1607 }
1608
1609 return 0;
1610} 1644}
1611 1645
1612static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) 1646static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
@@ -1618,7 +1652,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1618 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1652 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1619 return 0; 1653 return 0;
1620 1654
1621 if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ 1655 if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
1622 return pmd_young(*pmd); 1656 return pmd_young(*pmd);
1623 1657
1624 pte = pte_offset_kernel(pmd, gpa); 1658 pte = pte_offset_kernel(pmd, gpa);