KVM: ppc: support large host pages

KVM on 440 has always been able to handle large guest mappings with 4K host pages -- we must, since the guest kernel uses 256MB mappings. This patch makes KVM work when the host has large pages too (tested with 64K). Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com> Signed-off-by: Avi Kivity <avi@redhat.com>
author: Hollis Blanchard <hollisb@us.ibm.com> 2008-12-02 16:51:53 -0500
committer: Avi Kivity <avi@redhat.com> 2008-12-31 09:55:07 -0500
commit: 891686188f69d330f7eeeec8e6642ccfb7453106 (patch)
tree: 39349d7383b3cf1fe0fda983d8c4f0d86a6dc2bc /arch/powerpc/kvm
parent: 4a643be8c9b8d3c1ae8f5ccd377daaa85bd57e0c (diff)
2 files changed, 62 insertions, 21 deletions
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ee2461860bcf..d49dc66ab3c3 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -28,6 +28,13 @@
 #include "44x_tlb.h"
+#ifndef PPC44x_TLBE_SIZE
+#define PPC44x_TLBE_SIZE        PPC44x_TLB_4K
+#endif
+#define PAGE_SIZE_4K (1<<12)
+#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
 #define PPC44x_TLB_UATTR_MASK \
        (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
@@ -179,15 +186,26 @@ void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
        vcpu_44x->shadow_tlb_mod[i] = 1;
 }
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
+/**
- * the shadow TLB. */
+ * kvmppc_mmu_map -- create a host mapping for guest memory
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
+ *
-                    u32 flags)
+ * If the guest wanted a larger page than the host supports, only the first
+ * host page is mapped here and the rest are demand faulted.
+ *
+ * If the guest wanted a smaller page than the host page size, we map only the
+ * guest-size page (i.e. not a full host page mapping).
+ *
+ * Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB.
+ */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
+                    u32 flags, u32 max_bytes)
 {
        struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
        struct page *new_page;
        struct kvmppc_44x_tlbe *stlbe;
        hpa_t hpaddr;
+        gfn_t gfn;
        unsigned int victim;
        /* Future optimization: don't overwrite the TLB entry containing the
@@ -198,6 +216,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
        stlbe = &vcpu_44x->shadow_tlb[victim];
        /* Get reference to new page. */
+        gfn = gpaddr >> PAGE_SHIFT;
        new_page = gfn_to_page(vcpu->kvm, gfn);
        if (is_error_page(new_page)) {
                printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -220,10 +239,25 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
        stlbe->tid = !(asid & 0xff);
        /* Force TS=1 for all guest mappings. */
-        /* For now we hardcode 4KB mappings, but it will be important to
+        stlbe->word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
-         * use host large pages in the future. */
-        stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
+        if (max_bytes >= PAGE_SIZE) {
-                       | PPC44x_TLB_4K;
+                /* Guest mapping is larger than or equal to host page size. We can use
+                 * a "native" host mapping. */
+                stlbe->word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
+        } else {
+                /* Guest mapping is smaller than host page size. We must restrict the
+                 * size of the mapping to be at most the smaller of the two, but for
+                 * simplicity we fall back to a 4K mapping (this is probably what the
+                 * guest is using anyways). */
+                stlbe->word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
+                /* 'hpaddr' is a host page, which is larger than the mapping we're
+                 * inserting here. To compensate, we must add the in-page offset to the
+                 * sub-page. */
+                hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
+        }
        stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
        stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
                                                    vcpu->arch.msr & MSR_PR);
@@ -322,10 +356,8 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
 int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 {
        struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-        u64 eaddr;
+        gva_t eaddr;
-        u64 raddr;
        u64 asid;
-        u32 flags;
        struct kvmppc_44x_tlbe *tlbe;
        unsigned int index;
@@ -364,15 +396,22 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
        }
        if (tlbe_is_host_safe(vcpu, tlbe)) {
+                gpa_t gpaddr;
+                u32 flags;
+                u32 bytes;
                eaddr = get_tlb_eaddr(tlbe);
-                raddr = get_tlb_raddr(tlbe);
+                gpaddr = get_tlb_raddr(tlbe);
+                /* Use the advertised page size to mask effective and real addrs. */
+                bytes = get_tlb_bytes(tlbe);
+                eaddr &= ~(bytes - 1);
+                gpaddr &= ~(bytes - 1);
                asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
                flags = tlbe->word2 & 0xffff;
-                /* Create a 4KB mapping on the host. If the guest wanted a
+                kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes);
-                 * large page, only the first 4KB is mapped here and the rest
-                 * are mapped on the fly. */
-                kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
        }
        KVMTRACE_5D(GTLB_WRITE, vcpu, index,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ec59a6768ec3..924c7b4b1107 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -308,8 +308,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         * b) the guest used a large mapping which we're faking
                         * Either way, we need to satisfy the fault without
                         * invoking the guest. */
-                        kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+                        kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
-                                       gtlbe->word2);
+                                       gtlbe->word2, get_tlb_bytes(gtlbe));
                        vcpu->stat.dtlb_virt_miss_exits++;
                        r = RESUME_GUEST;
                } else {
@@ -325,6 +325,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case BOOKE_INTERRUPT_ITLB_MISS: {
                struct kvmppc_44x_tlbe *gtlbe;
                unsigned long eaddr = vcpu->arch.pc;
+                gpa_t gpaddr;
                gfn_t gfn;
                r = RESUME_GUEST;
@@ -340,7 +341,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                vcpu->stat.itlb_virt_miss_exits++;
-                gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
+                gpaddr = tlb_xlate(gtlbe, eaddr);
+                gfn = gpaddr >> PAGE_SHIFT;
                if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
                        /* The guest TLB had a mapping, but the shadow TLB
@@ -349,8 +351,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         * b) the guest used a large mapping which we're faking
                         * Either way, we need to satisfy the fault without
                         * invoking the guest. */
-                        kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+                        kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
-                                       gtlbe->word2);
+                                       gtlbe->word2, get_tlb_bytes(gtlbe));
                } else {
                        /* Guest mapped and leaped at non-RAM! */
                        kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
author	Hollis Blanchard <hollisb@us.ibm.com>	2008-12-02 16:51:53 -0500
committer	Avi Kivity <avi@redhat.com>	2008-12-31 09:55:07 -0500
commit	891686188f69d330f7eeeec8e6642ccfb7453106 (patch)
tree	39349d7383b3cf1fe0fda983d8c4f0d86a6dc2bc /arch/powerpc/kvm
parent	4a643be8c9b8d3c1ae8f5ccd377daaa85bd57e0c (diff)

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index ee2461860bcf..d49dc66ab3c3 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c
@@ -28,6 +28,13 @@
28		28
29	#include "44x_tlb.h"	29	#include "44x_tlb.h"
30		30
		31	#ifndef PPC44x_TLBE_SIZE
		32	#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
		33	#endif
		34
		35	#define PAGE_SIZE_4K (1<<12)
		36	#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
		37
31	#define PPC44x_TLB_UATTR_MASK \	38	#define PPC44x_TLB_UATTR_MASK \
32	(PPC44x_TLB_U0\|PPC44x_TLB_U1\|PPC44x_TLB_U2\|PPC44x_TLB_U3)	39	(PPC44x_TLB_U0\|PPC44x_TLB_U1\|PPC44x_TLB_U2\|PPC44x_TLB_U3)
33	#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX\|PPC44x_TLB_UR\|PPC44x_TLB_UW)	40	#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX\|PPC44x_TLB_UR\|PPC44x_TLB_UW)
@@ -179,15 +186,26 @@ void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
179	vcpu_44x->shadow_tlb_mod[i] = 1;	186	vcpu_44x->shadow_tlb_mod[i] = 1;
180	}	187	}
181		188
182	/* Caller must ensure that the specified guest TLB entry is safe to insert into	189	/**
183	* the shadow TLB. */	190	* kvmppc_mmu_map -- create a host mapping for guest memory
184	void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,	191	*
185	u32 flags)	192	* If the guest wanted a larger page than the host supports, only the first
		193	* host page is mapped here and the rest are demand faulted.
		194	*
		195	* If the guest wanted a smaller page than the host page size, we map only the
		196	* guest-size page (i.e. not a full host page mapping).
		197	*
		198	* Caller must ensure that the specified guest TLB entry is safe to insert into
		199	* the shadow TLB.
		200	*/
		201	void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
		202	u32 flags, u32 max_bytes)
186	{	203	{
187	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);	204	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
188	struct page *new_page;	205	struct page *new_page;
189	struct kvmppc_44x_tlbe *stlbe;	206	struct kvmppc_44x_tlbe *stlbe;
190	hpa_t hpaddr;	207	hpa_t hpaddr;
		208	gfn_t gfn;
191	unsigned int victim;	209	unsigned int victim;
192		210
193	/* Future optimization: don't overwrite the TLB entry containing the	211	/* Future optimization: don't overwrite the TLB entry containing the
@@ -198,6 +216,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
198	stlbe = &vcpu_44x->shadow_tlb[victim];	216	stlbe = &vcpu_44x->shadow_tlb[victim];
199		217
200	/* Get reference to new page. */	218	/* Get reference to new page. */
		219	gfn = gpaddr >> PAGE_SHIFT;
201	new_page = gfn_to_page(vcpu->kvm, gfn);	220	new_page = gfn_to_page(vcpu->kvm, gfn);
202	if (is_error_page(new_page)) {	221	if (is_error_page(new_page)) {
203	printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);	222	printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -220,10 +239,25 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
220	stlbe->tid = !(asid & 0xff);	239	stlbe->tid = !(asid & 0xff);
221		240
222	/* Force TS=1 for all guest mappings. */	241	/* Force TS=1 for all guest mappings. */
223	/* For now we hardcode 4KB mappings, but it will be important to	242	stlbe->word0 = PPC44x_TLB_VALID \| PPC44x_TLB_TS;
224	* use host large pages in the future. */	243
225	stlbe->word0 = (gvaddr & PAGE_MASK) \| PPC44x_TLB_VALID \| PPC44x_TLB_TS	244	if (max_bytes >= PAGE_SIZE) {
226	\| PPC44x_TLB_4K;	245	/* Guest mapping is larger than or equal to host page size. We can use
		246	* a "native" host mapping. */
		247	stlbe->word0 \|= (gvaddr & PAGE_MASK) \| PPC44x_TLBE_SIZE;
		248	} else {
		249	/* Guest mapping is smaller than host page size. We must restrict the
		250	* size of the mapping to be at most the smaller of the two, but for
		251	* simplicity we fall back to a 4K mapping (this is probably what the
		252	* guest is using anyways). */
		253	stlbe->word0 \|= (gvaddr & PAGE_MASK_4K) \| PPC44x_TLB_4K;
		254
		255	/* 'hpaddr' is a host page, which is larger than the mapping we're
		256	* inserting here. To compensate, we must add the in-page offset to the
		257	* sub-page. */
		258	hpaddr \|= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
		259	}
		260
227	stlbe->word1 = (hpaddr & 0xfffffc00) \| ((hpaddr >> 32) & 0xf);	261	stlbe->word1 = (hpaddr & 0xfffffc00) \| ((hpaddr >> 32) & 0xf);
228	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,	262	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
229	vcpu->arch.msr & MSR_PR);	263	vcpu->arch.msr & MSR_PR);
@@ -322,10 +356,8 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
322	int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)	356	int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
323	{	357	{
324	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);	358	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
325	u64 eaddr;	359	gva_t eaddr;
326	u64 raddr;
327	u64 asid;	360	u64 asid;
328	u32 flags;
329	struct kvmppc_44x_tlbe *tlbe;	361	struct kvmppc_44x_tlbe *tlbe;
330	unsigned int index;	362	unsigned int index;
331		363
@@ -364,15 +396,22 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
364	}	396	}
365		397
366	if (tlbe_is_host_safe(vcpu, tlbe)) {	398	if (tlbe_is_host_safe(vcpu, tlbe)) {
		399	gpa_t gpaddr;
		400	u32 flags;
		401	u32 bytes;
		402
367	eaddr = get_tlb_eaddr(tlbe);	403	eaddr = get_tlb_eaddr(tlbe);
368	raddr = get_tlb_raddr(tlbe);	404	gpaddr = get_tlb_raddr(tlbe);
		405
		406	/* Use the advertised page size to mask effective and real addrs. */
		407	bytes = get_tlb_bytes(tlbe);
		408	eaddr &= ~(bytes - 1);
		409	gpaddr &= ~(bytes - 1);
		410
369	asid = (tlbe->word0 & PPC44x_TLB_TS) \| tlbe->tid;	411	asid = (tlbe->word0 & PPC44x_TLB_TS) \| tlbe->tid;
370	flags = tlbe->word2 & 0xffff;	412	flags = tlbe->word2 & 0xffff;
371		413
372	/* Create a 4KB mapping on the host. If the guest wanted a	414	kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes);
373	* large page, only the first 4KB is mapped here and the rest
374	* are mapped on the fly. */
375	kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
376	}	415	}
377		416
378	KVMTRACE_5D(GTLB_WRITE, vcpu, index,	417	KVMTRACE_5D(GTLB_WRITE, vcpu, index,


diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ec59a6768ec3..924c7b4b1107 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c
@@ -308,8 +308,8 @@ int kvmppc_handle_exit(struct kvm_run run, struct kvm_vcpu vcpu,
308	* b) the guest used a large mapping which we're faking	308	* b) the guest used a large mapping which we're faking
309	* Either way, we need to satisfy the fault without	309	* Either way, we need to satisfy the fault without
310	* invoking the guest. */	310	* invoking the guest. */
311	kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,	311	kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
312	gtlbe->word2);	312	gtlbe->word2, get_tlb_bytes(gtlbe));
313	vcpu->stat.dtlb_virt_miss_exits++;	313	vcpu->stat.dtlb_virt_miss_exits++;
314	r = RESUME_GUEST;	314	r = RESUME_GUEST;
315	} else {	315	} else {
@@ -325,6 +325,7 @@ int kvmppc_handle_exit(struct kvm_run run, struct kvm_vcpu vcpu,
325	case BOOKE_INTERRUPT_ITLB_MISS: {	325	case BOOKE_INTERRUPT_ITLB_MISS: {
326	struct kvmppc_44x_tlbe *gtlbe;	326	struct kvmppc_44x_tlbe *gtlbe;
327	unsigned long eaddr = vcpu->arch.pc;	327	unsigned long eaddr = vcpu->arch.pc;
		328	gpa_t gpaddr;
328	gfn_t gfn;	329	gfn_t gfn;
329		330
330	r = RESUME_GUEST;	331	r = RESUME_GUEST;
@@ -340,7 +341,8 @@ int kvmppc_handle_exit(struct kvm_run run, struct kvm_vcpu vcpu,
340		341
341	vcpu->stat.itlb_virt_miss_exits++;	342	vcpu->stat.itlb_virt_miss_exits++;
342		343
343	gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;	344	gpaddr = tlb_xlate(gtlbe, eaddr);
		345	gfn = gpaddr >> PAGE_SHIFT;
344		346
345	if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {	347	if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
346	/* The guest TLB had a mapping, but the shadow TLB	348	/* The guest TLB had a mapping, but the shadow TLB
@@ -349,8 +351,8 @@ int kvmppc_handle_exit(struct kvm_run run, struct kvm_vcpu vcpu,
349	* b) the guest used a large mapping which we're faking	351	* b) the guest used a large mapping which we're faking
350	* Either way, we need to satisfy the fault without	352	* Either way, we need to satisfy the fault without
351	* invoking the guest. */	353	* invoking the guest. */
352	kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,	354	kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
353	gtlbe->word2);	355	gtlbe->word2, get_tlb_bytes(gtlbe));
354	} else {	356	} else {
355	/* Guest mapped and leaped at non-RAM! */	357	/* Guest mapped and leaped at non-RAM! */
356	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);	358	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);