Merge tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Marcelo Tosatti: "Considerable KVM/PPC work, x86 kvmclock vsyscall support, IA32_TSC_ADJUST MSR emulation, amongst others." Fix up trivial conflict in kernel/sched/core.c due to cross-cpu migration notifier added next to rq migration call-back. * tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (156 commits) KVM: emulator: fix real mode segment checks in address linearization VMX: remove unneeded enable_unrestricted_guest check KVM: VMX: fix DPL during entry to protected mode x86/kexec: crash_vmclear_local_vmcss needs __rcu kvm: Fix irqfd resampler list walk KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary KVM: MMU: optimize for set_spte KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation KVM: PPC: bookehv: Add guest computation mode for irq delivery KVM: PPC: Make EPCR a valid field for booke64 and bookehv KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation KVM: PPC: e500: Add emulation helper for getting instruction ea KVM: PPC: bookehv64: Add support for interrupt handling KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler KVM: PPC: booke: Fix get_tb() compile error on 64-bit KVM: PPC: e500: Silence bogus GCC warning in tlb code ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-13 18:31:08 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-13 18:31:08 -0500
commit: 66cdd0ceaf65a18996f561b770eedde1d123b019 (patch)
tree: 4892eaa422d366fce5d1e866ff1fe0988af95569 /arch/x86/kvm/paging_tmpl.h
parent: 896ea17d3da5f44b2625c9cda9874d7dfe447393 (diff)
parent: 58b7825bc324da55415034a9f6ca5d716b8fd898 (diff)
1 files changed, 41 insertions, 74 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 714e2c01a6fe..891eb6d93b8b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -305,51 +305,43 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
                                        addr, access);
 }
-static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
+static bool
-                                    struct kvm_mmu_page *sp, u64 *spte,
+FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-                                    pt_element_t gpte)
+                     u64 *spte, pt_element_t gpte, bool no_dirty_log)
 {
-        if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
-                goto no_present;
-        if (!is_present_gpte(gpte))
-                goto no_present;
-        if (!(gpte & PT_ACCESSED_MASK))
-                goto no_present;
-        return false;
-no_present:
-        drop_spte(vcpu->kvm, spte);
-        return true;
-}
-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-                              u64 *spte, const void *pte)
-{
-        pt_element_t gpte;
        unsigned pte_access;
+        gfn_t gfn;
        pfn_t pfn;
-        gpte = *(const pt_element_t *)pte;
+        if (prefetch_invalid_gpte(vcpu, sp, spte, gpte))
-        if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
+                return false;
-                return;
        pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
+        gfn = gpte_to_gfn(gpte);
        pte_access = sp->role.access & gpte_access(vcpu, gpte);
        protect_clean_gpte(&pte_access, gpte);
-        pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
+        pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
-        if (mmu_invalid_pfn(pfn))
+                        no_dirty_log && (pte_access & ACC_WRITE_MASK));
-                return;
+        if (is_error_pfn(pfn))
+                return false;
        /*
-         * we call mmu_set_spte() with host_writable = true because that
+         * we call mmu_set_spte() with host_writable = true because
-         * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
+         * pte_prefetch_gfn_to_pfn always gets a writable pfn.
         */
        mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
-                     NULL, PT_PAGE_TABLE_LEVEL,
+                     NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
-                     gpte_to_gfn(gpte), pfn, true, true);
+        return true;
+}
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+                              u64 *spte, const void *pte)
+{
+        pt_element_t gpte = *(const pt_element_t *)pte;
+        FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
 }
 static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
@@ -395,53 +387,34 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
        spte = sp->spt + i;
        for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
-                pt_element_t gpte;
-                unsigned pte_access;
-                gfn_t gfn;
-                pfn_t pfn;
                if (spte == sptep)
                        continue;
                if (is_shadow_present_pte(*spte))
                        continue;
-                gpte = gptep[i];
+                if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
-                if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
-                        continue;
-                pte_access = sp->role.access & gpte_access(vcpu, gpte);
-                protect_clean_gpte(&pte_access, gpte);
-                gfn = gpte_to_gfn(gpte);
-                pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
-                                      pte_access & ACC_WRITE_MASK);
-                if (mmu_invalid_pfn(pfn))
                        break;
-                mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
-                             NULL, PT_PAGE_TABLE_LEVEL, gfn,
-                             pfn, true, true);
        }
 }
 /*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
+ * If the guest tries to write a write-protected page, we need to
+ * emulate this operation, return 1 to indicate this case.
 */
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                         struct guest_walker *gw,
                         int user_fault, int write_fault, int hlevel,
-                         int *emulate, pfn_t pfn, bool map_writable,
+                         pfn_t pfn, bool map_writable, bool prefault)
-                         bool prefault)
 {
-        unsigned access = gw->pt_access;
        struct kvm_mmu_page *sp = NULL;
-        int top_level;
-        unsigned direct_access;
        struct kvm_shadow_walk_iterator it;
+        unsigned direct_access, access = gw->pt_access;
+        int top_level, emulate = 0;
        if (!is_present_gpte(gw->ptes[gw->level - 1]))
-                return NULL;
+                return 0;
        direct_access = gw->pte_access;
@@ -505,17 +478,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
        clear_sp_write_flooding_count(it.sptep);
        mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
-                     user_fault, write_fault, emulate, it.level,
+                     user_fault, write_fault, &emulate, it.level,
                     gw->gfn, pfn, prefault, map_writable);
        FNAME(pte_prefetch)(vcpu, gw, it.sptep);
-        return it.sptep;
+        return emulate;
 out_gpte_changed:
        if (sp)
                kvm_mmu_put_page(sp, it.sptep);
        kvm_release_pfn_clean(pfn);
-        return NULL;
+        return 0;
 }
 /*
@@ -538,8 +511,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
        int write_fault = error_code & PFERR_WRITE_MASK;
        int user_fault = error_code & PFERR_USER_MASK;
        struct guest_walker walker;
-        u64 *sptep;
-        int emulate = 0;
        int r;
        pfn_t pfn;
        int level = PT_PAGE_TABLE_LEVEL;
@@ -594,24 +565,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
                return r;
        spin_lock(&vcpu->kvm->mmu_lock);
-        if (mmu_notifier_retry(vcpu, mmu_seq))
+        if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
                goto out_unlock;
        kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
        kvm_mmu_free_some_pages(vcpu);
        if (!force_pt_level)
                transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
-        sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
+        r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
-                             level, &emulate, pfn, map_writable, prefault);
+                         level, pfn, map_writable, prefault);
-        (void)sptep;
-        pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
-                 sptep, *sptep, emulate);
        ++vcpu->stat.pf_fixed;
        kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
        spin_unlock(&vcpu->kvm->mmu_lock);
-        return emulate;
+        return r;
 out_unlock:
        spin_unlock(&vcpu->kvm->mmu_lock);
@@ -757,7 +724,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
                                          sizeof(pt_element_t)))
                        return -EINVAL;
-                if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
+                if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) {
                        vcpu->kvm->tlbs_dirty++;
                        continue;
                }
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-12-13 18:31:08 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-12-13 18:31:08 -0500
commit	66cdd0ceaf65a18996f561b770eedde1d123b019 (patch)
tree	4892eaa422d366fce5d1e866ff1fe0988af95569 /arch/x86/kvm/paging_tmpl.h
parent	896ea17d3da5f44b2625c9cda9874d7dfe447393 (diff)
parent	58b7825bc324da55415034a9f6ca5d716b8fd898 (diff)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 714e2c01a6fe..891eb6d93b8b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h
@@ -305,51 +305,43 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
305	addr, access);	305	addr, access);
306	}	306	}
307		307
308	static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,	308	static bool
309	struct kvm_mmu_page sp, u64 spte,	309	FNAME(prefetch_gpte)(struct kvm_vcpu vcpu, struct kvm_mmu_page sp,
310	pt_element_t gpte)	310	u64 *spte, pt_element_t gpte, bool no_dirty_log)
311	{	311	{
312	if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
313	goto no_present;
314
315	if (!is_present_gpte(gpte))
316	goto no_present;
317
318	if (!(gpte & PT_ACCESSED_MASK))
319	goto no_present;
320
321	return false;
322
323	no_present:
324	drop_spte(vcpu->kvm, spte);
325	return true;
326	}
327
328	static void FNAME(update_pte)(struct kvm_vcpu vcpu, struct kvm_mmu_page sp,
329	u64 spte, const void pte)
330	{
331	pt_element_t gpte;
332	unsigned pte_access;	312	unsigned pte_access;
		313	gfn_t gfn;
333	pfn_t pfn;	314	pfn_t pfn;
334		315
335	gpte = (const pt_element_t )pte;	316	if (prefetch_invalid_gpte(vcpu, sp, spte, gpte))
336	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))	317	return false;
337	return;
338		318
339	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);	319	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
		320
		321	gfn = gpte_to_gfn(gpte);
340	pte_access = sp->role.access & gpte_access(vcpu, gpte);	322	pte_access = sp->role.access & gpte_access(vcpu, gpte);
341	protect_clean_gpte(&pte_access, gpte);	323	protect_clean_gpte(&pte_access, gpte);
342	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));	324	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
343	if (mmu_invalid_pfn(pfn))	325	no_dirty_log && (pte_access & ACC_WRITE_MASK));
344	return;	326	if (is_error_pfn(pfn))
		327	return false;
345		328
346	/*	329	/*
347	* we call mmu_set_spte() with host_writable = true because that	330	* we call mmu_set_spte() with host_writable = true because
348	* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).	331	* pte_prefetch_gfn_to_pfn always gets a writable pfn.
349	*/	332	*/
350	mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,	333	mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
351	NULL, PT_PAGE_TABLE_LEVEL,	334	NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
352	gpte_to_gfn(gpte), pfn, true, true);	335
		336	return true;
		337	}
		338
		339	static void FNAME(update_pte)(struct kvm_vcpu vcpu, struct kvm_mmu_page sp,
		340	u64 spte, const void pte)
		341	{
		342	pt_element_t gpte = (const pt_element_t )pte;
		343
		344	FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
353	}	345	}
354		346
355	static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,	347	static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
@@ -395,53 +387,34 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu vcpu, struct guest_walker gw,
395	spte = sp->spt + i;	387	spte = sp->spt + i;
396		388
397	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {	389	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
398	pt_element_t gpte;
399	unsigned pte_access;
400	gfn_t gfn;
401	pfn_t pfn;
402
403	if (spte == sptep)	390	if (spte == sptep)
404	continue;	391	continue;
405		392
406	if (is_shadow_present_pte(*spte))	393	if (is_shadow_present_pte(*spte))
407	continue;	394	continue;
408		395
409	gpte = gptep[i];	396	if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
410
411	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
412	continue;
413
414	pte_access = sp->role.access & gpte_access(vcpu, gpte);
415	protect_clean_gpte(&pte_access, gpte);
416	gfn = gpte_to_gfn(gpte);
417	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
418	pte_access & ACC_WRITE_MASK);
419	if (mmu_invalid_pfn(pfn))
420	break;	397	break;
421
422	mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
423	NULL, PT_PAGE_TABLE_LEVEL, gfn,
424	pfn, true, true);
425	}	398	}
426	}	399	}
427		400
428	/*	401	/*
429	* Fetch a shadow pte for a specific level in the paging hierarchy.	402	* Fetch a shadow pte for a specific level in the paging hierarchy.
		403	* If the guest tries to write a write-protected page, we need to
		404	* emulate this operation, return 1 to indicate this case.
430	*/	405	*/
431	static u64 FNAME(fetch)(struct kvm_vcpu vcpu, gva_t addr,	406	static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
432	struct guest_walker *gw,	407	struct guest_walker *gw,
433	int user_fault, int write_fault, int hlevel,	408	int user_fault, int write_fault, int hlevel,
434	int *emulate, pfn_t pfn, bool map_writable,	409	pfn_t pfn, bool map_writable, bool prefault)
435	bool prefault)
436	{	410	{
437	unsigned access = gw->pt_access;
438	struct kvm_mmu_page *sp = NULL;	411	struct kvm_mmu_page *sp = NULL;
439	int top_level;
440	unsigned direct_access;
441	struct kvm_shadow_walk_iterator it;	412	struct kvm_shadow_walk_iterator it;
		413	unsigned direct_access, access = gw->pt_access;
		414	int top_level, emulate = 0;
442		415
443	if (!is_present_gpte(gw->ptes[gw->level - 1]))	416	if (!is_present_gpte(gw->ptes[gw->level - 1]))
444	return NULL;	417	return 0;
445		418
446	direct_access = gw->pte_access;	419	direct_access = gw->pte_access;
447		420
@@ -505,17 +478,17 @@ static u64 FNAME(fetch)(struct kvm_vcpu vcpu, gva_t addr,
505		478
506	clear_sp_write_flooding_count(it.sptep);	479	clear_sp_write_flooding_count(it.sptep);
507	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,	480	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
508	user_fault, write_fault, emulate, it.level,	481	user_fault, write_fault, &emulate, it.level,
509	gw->gfn, pfn, prefault, map_writable);	482	gw->gfn, pfn, prefault, map_writable);
510	FNAME(pte_prefetch)(vcpu, gw, it.sptep);	483	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
511		484
512	return it.sptep;	485	return emulate;
513		486
514	out_gpte_changed:	487	out_gpte_changed:
515	if (sp)	488	if (sp)
516	kvm_mmu_put_page(sp, it.sptep);	489	kvm_mmu_put_page(sp, it.sptep);
517	kvm_release_pfn_clean(pfn);	490	kvm_release_pfn_clean(pfn);
518	return NULL;	491	return 0;
519	}	492	}
520		493
521	/*	494	/*
@@ -538,8 +511,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
538	int write_fault = error_code & PFERR_WRITE_MASK;	511	int write_fault = error_code & PFERR_WRITE_MASK;
539	int user_fault = error_code & PFERR_USER_MASK;	512	int user_fault = error_code & PFERR_USER_MASK;
540	struct guest_walker walker;	513	struct guest_walker walker;
541	u64 *sptep;
542	int emulate = 0;
543	int r;	514	int r;
544	pfn_t pfn;	515	pfn_t pfn;
545	int level = PT_PAGE_TABLE_LEVEL;	516	int level = PT_PAGE_TABLE_LEVEL;
@@ -594,24 +565,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
594	return r;	565	return r;
595		566
596	spin_lock(&vcpu->kvm->mmu_lock);	567	spin_lock(&vcpu->kvm->mmu_lock);
597	if (mmu_notifier_retry(vcpu, mmu_seq))	568	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
598	goto out_unlock;	569	goto out_unlock;
599		570
600	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);	571	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
601	kvm_mmu_free_some_pages(vcpu);	572	kvm_mmu_free_some_pages(vcpu);
602	if (!force_pt_level)	573	if (!force_pt_level)
603	transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);	574	transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
604	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,	575	r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
605	level, &emulate, pfn, map_writable, prefault);	576	level, pfn, map_writable, prefault);
606	(void)sptep;
607	pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
608	sptep, *sptep, emulate);
609
610	++vcpu->stat.pf_fixed;	577	++vcpu->stat.pf_fixed;
611	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);	578	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
612	spin_unlock(&vcpu->kvm->mmu_lock);	579	spin_unlock(&vcpu->kvm->mmu_lock);
613		580
614	return emulate;	581	return r;
615		582
616	out_unlock:	583	out_unlock:
617	spin_unlock(&vcpu->kvm->mmu_lock);	584	spin_unlock(&vcpu->kvm->mmu_lock);
@@ -757,7 +724,7 @@ static int FNAME(sync_page)(struct kvm_vcpu vcpu, struct kvm_mmu_page sp)
757	sizeof(pt_element_t)))	724	sizeof(pt_element_t)))
758	return -EINVAL;	725	return -EINVAL;
759		726
760	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {	727	if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) {
761	vcpu->kvm->tlbs_dirty++;	728	vcpu->kvm->tlbs_dirty++;
762	continue;	729	continue;
763	}	730	}