aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Gibson <dwg@au1.ibm.com>2011-06-28 20:22:41 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 06:16:56 -0400
commit54738c097163c3f01e67ccc85462b78d4d4f495f (patch)
treecba8d389d50251856cbe967c16ba2193a30d6d12
parenta8606e20e41a8149456bafdf76ad29d47672027c (diff)
KVM: PPC: Accelerate H_PUT_TCE by implementing it in real mode
This improves I/O performance for guests using the PAPR paravirtualization interface by making the H_PUT_TCE hcall faster, by implementing it in real mode. H_PUT_TCE is used for updating virtual IOMMU tables, and is used both for virtual I/O and for real I/O in the PAPR interface. Since this moves the IOMMU tables into the kernel, we define a new KVM_CREATE_SPAPR_TCE ioctl to allow qemu to create the tables. The ioctl returns a file descriptor which can be used to mmap the newly created table. The qemu driver models use them in the same way as userspace managed tables, but they can be updated directly by the guest with a real-mode H_PUT_TCE implementation, reducing the number of host/guest context switches during guest IO. There are certain circumstances where it is useful for userland qemu to write to the TCE table even if the kernel H_PUT_TCE path is used most of the time. Specifically, allowing this will avoid awkwardness when we need to reset the table. More importantly, we will in the future need to write the table in order to restore its state after a checkpoint resume or migration. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--Documentation/virtual/kvm/api.txt35
-rw-r--r--arch/powerpc/include/asm/kvm.h9
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h9
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/kvm/Makefile3
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c73
-rw-r--r--arch/powerpc/kvm/book3s_hv.c116
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2
-rw-r--r--arch/powerpc/kvm/powerpc.c18
-rw-r--r--include/linux/kvm.h2
11 files changed, 268 insertions, 3 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e8875fef3eb..a1d344d5ff4 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1350,6 +1350,41 @@ The following flags are defined:
1350If datamatch flag is set, the event will be signaled only if the written value 1350If datamatch flag is set, the event will be signaled only if the written value
1351to the registered address is equal to datamatch in struct kvm_ioeventfd. 1351to the registered address is equal to datamatch in struct kvm_ioeventfd.
1352 1352
13534.62 KVM_CREATE_SPAPR_TCE
1354
1355Capability: KVM_CAP_SPAPR_TCE
1356Architectures: powerpc
1357Type: vm ioctl
1358Parameters: struct kvm_create_spapr_tce (in)
1359Returns: file descriptor for manipulating the created TCE table
1360
1361This creates a virtual TCE (translation control entry) table, which
1362is an IOMMU for PAPR-style virtual I/O. It is used to translate
1363logical addresses used in virtual I/O into guest physical addresses,
1364and provides a scatter/gather capability for PAPR virtual I/O.
1365
1366/* for KVM_CAP_SPAPR_TCE */
1367struct kvm_create_spapr_tce {
1368 __u64 liobn;
1369 __u32 window_size;
1370};
1371
1372The liobn field gives the logical IO bus number for which to create a
1373TCE table. The window_size field specifies the size of the DMA window
1374which this TCE table will translate - the table will contain one 64
1375bit TCE entry for every 4kiB of the DMA window.
1376
1377When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE
1378table has been created using this ioctl(), the kernel will handle it
1379in real mode, updating the TCE table. H_PUT_TCE calls for other
1380liobns will cause a vm exit and must be handled by userspace.
1381
1382The return value is a file descriptor which can be passed to mmap(2)
1383to map the created TCE table into userspace. This lets userspace read
1384the entries written by kernel-handled H_PUT_TCE calls, and also lets
1385userspace update the TCE table directly which is useful in some
1386circumstances.
1387
13535. The kvm_run structure 13885. The kvm_run structure
1354 1389
1355Application code obtains a pointer to the kvm_run structure by 1390Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index d2ca5ed3877..c3ec990daf4 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -22,6 +22,9 @@
22 22
23#include <linux/types.h> 23#include <linux/types.h>
24 24
25/* Select powerpc specific features in <linux/kvm.h> */
26#define __KVM_HAVE_SPAPR_TCE
27
25struct kvm_regs { 28struct kvm_regs {
26 __u64 pc; 29 __u64 pc;
27 __u64 cr; 30 __u64 cr;
@@ -272,4 +275,10 @@ struct kvm_guest_debug_arch {
272#define KVM_INTERRUPT_UNSET -2U 275#define KVM_INTERRUPT_UNSET -2U
273#define KVM_INTERRUPT_SET_LEVEL -3U 276#define KVM_INTERRUPT_SET_LEVEL -3U
274 277
278/* for KVM_CAP_SPAPR_TCE */
279struct kvm_create_spapr_tce {
280 __u64 liobn;
281 __u32 window_size;
282};
283
275#endif /* __LINUX_KVM_POWERPC_H */ 284#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 5f73388ea0a..e43fe42b987 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -27,4 +27,6 @@ static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
27} 27}
28#endif 28#endif
29 29
30#define SPAPR_TCE_SHIFT 12
31
30#endif /* __ASM_KVM_BOOK3S_64_H__ */ 32#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6ebf1721680..5616e39a7fa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -144,6 +144,14 @@ struct kvmppc_pginfo {
144 atomic_t refcnt; 144 atomic_t refcnt;
145}; 145};
146 146
147struct kvmppc_spapr_tce_table {
148 struct list_head list;
149 struct kvm *kvm;
150 u64 liobn;
151 u32 window_size;
152 struct page *pages[0];
153};
154
147struct kvm_arch { 155struct kvm_arch {
148#ifdef CONFIG_KVM_BOOK3S_64_HV 156#ifdef CONFIG_KVM_BOOK3S_64_HV
149 unsigned long hpt_virt; 157 unsigned long hpt_virt;
@@ -157,6 +165,7 @@ struct kvm_arch {
157 unsigned long sdr1; 165 unsigned long sdr1;
158 unsigned long host_sdr1; 166 unsigned long host_sdr1;
159 int tlbie_lock; 167 int tlbie_lock;
168 struct list_head spapr_tce_tables;
160 unsigned short last_vcpu[NR_CPUS]; 169 unsigned short last_vcpu[NR_CPUS];
161#endif /* CONFIG_KVM_BOOK3S_64_HV */ 170#endif /* CONFIG_KVM_BOOK3S_64_HV */
162}; 171};
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2afe92e6f62..99f6fcf4cf8 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -119,6 +119,8 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
119extern void kvmppc_map_vrma(struct kvm *kvm, 119extern void kvmppc_map_vrma(struct kvm *kvm,
120 struct kvm_userspace_memory_region *mem); 120 struct kvm_userspace_memory_region *mem);
121extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); 121extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
122extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
123 struct kvm_create_spapr_tce *args);
122extern int kvmppc_core_init_vm(struct kvm *kvm); 124extern int kvmppc_core_init_vm(struct kvm *kvm);
123extern void kvmppc_core_destroy_vm(struct kvm *kvm); 125extern void kvmppc_core_destroy_vm(struct kvm *kvm);
124extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, 126extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2ecffc0dc1b..1de3d54901d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -55,7 +55,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
55 book3s_hv_interrupts.o \ 55 book3s_hv_interrupts.o \
56 book3s_64_mmu_hv.o 56 book3s_64_mmu_hv.o
57kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 57kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
58 book3s_hv_rm_mmu.o 58 book3s_hv_rm_mmu.o \
59 book3s_64_vio_hv.o
59 60
60kvm-book3s_64-module-objs := \ 61kvm-book3s_64-module-objs := \
61 ../../../virt/kvm/kvm_main.o \ 62 ../../../virt/kvm/kvm_main.o \
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
new file mode 100644
index 00000000000..ea0f8c537c2
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -0,0 +1,73 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17 */
18
19#include <linux/types.h>
20#include <linux/string.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/highmem.h>
24#include <linux/gfp.h>
25#include <linux/slab.h>
26#include <linux/hugetlb.h>
27#include <linux/list.h>
28
29#include <asm/tlbflush.h>
30#include <asm/kvm_ppc.h>
31#include <asm/kvm_book3s.h>
32#include <asm/mmu-hash64.h>
33#include <asm/hvcall.h>
34#include <asm/synch.h>
35#include <asm/ppc-opcode.h>
36#include <asm/kvm_host.h>
37#include <asm/udbg.h>
38
39#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
40
41long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
42 unsigned long ioba, unsigned long tce)
43{
44 struct kvm *kvm = vcpu->kvm;
45 struct kvmppc_spapr_tce_table *stt;
46
47 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
48 /* liobn, ioba, tce); */
49
50 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
51 if (stt->liobn == liobn) {
52 unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
53 struct page *page;
54 u64 *tbl;
55
56 /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
57 /* liobn, stt, stt->window_size); */
58 if (ioba >= stt->window_size)
59 return H_PARAMETER;
60
61 page = stt->pages[idx / TCES_PER_PAGE];
62 tbl = (u64 *)page_address(page);
63
64 /* FIXME: Need to validate the TCE itself */
65 /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
66 tbl[idx % TCES_PER_PAGE] = tce;
67 return H_SUCCESS;
68 }
69 }
70
71 /* Didn't find the liobn, punt it to userspace */
72 return H_TOO_HARD;
73}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index af862c30b70..6fe469eabce 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -538,6 +538,116 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
538 return r; 538 return r;
539} 539}
540 540
541static long kvmppc_stt_npages(unsigned long window_size)
542{
543 return ALIGN((window_size >> SPAPR_TCE_SHIFT)
544 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
545}
546
547static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
548{
549 struct kvm *kvm = stt->kvm;
550 int i;
551
552 mutex_lock(&kvm->lock);
553 list_del(&stt->list);
554 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
555 __free_page(stt->pages[i]);
556 kfree(stt);
557 mutex_unlock(&kvm->lock);
558
559 kvm_put_kvm(kvm);
560}
561
562static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
563{
564 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
565 struct page *page;
566
567 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
568 return VM_FAULT_SIGBUS;
569
570 page = stt->pages[vmf->pgoff];
571 get_page(page);
572 vmf->page = page;
573 return 0;
574}
575
576static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
577 .fault = kvm_spapr_tce_fault,
578};
579
580static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
581{
582 vma->vm_ops = &kvm_spapr_tce_vm_ops;
583 return 0;
584}
585
586static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
587{
588 struct kvmppc_spapr_tce_table *stt = filp->private_data;
589
590 release_spapr_tce_table(stt);
591 return 0;
592}
593
594static struct file_operations kvm_spapr_tce_fops = {
595 .mmap = kvm_spapr_tce_mmap,
596 .release = kvm_spapr_tce_release,
597};
598
599long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
600 struct kvm_create_spapr_tce *args)
601{
602 struct kvmppc_spapr_tce_table *stt = NULL;
603 long npages;
604 int ret = -ENOMEM;
605 int i;
606
607 /* Check this LIOBN hasn't been previously allocated */
608 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
609 if (stt->liobn == args->liobn)
610 return -EBUSY;
611 }
612
613 npages = kvmppc_stt_npages(args->window_size);
614
615 stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
616 GFP_KERNEL);
617 if (!stt)
618 goto fail;
619
620 stt->liobn = args->liobn;
621 stt->window_size = args->window_size;
622 stt->kvm = kvm;
623
624 for (i = 0; i < npages; i++) {
625 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
626 if (!stt->pages[i])
627 goto fail;
628 }
629
630 kvm_get_kvm(kvm);
631
632 mutex_lock(&kvm->lock);
633 list_add(&stt->list, &kvm->arch.spapr_tce_tables);
634
635 mutex_unlock(&kvm->lock);
636
637 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
638 stt, O_RDWR);
639
640fail:
641 if (stt) {
642 for (i = 0; i < npages; i++)
643 if (stt->pages[i])
644 __free_page(stt->pages[i]);
645
646 kfree(stt);
647 }
648 return ret;
649}
650
541int kvmppc_core_prepare_memory_region(struct kvm *kvm, 651int kvmppc_core_prepare_memory_region(struct kvm *kvm,
542 struct kvm_userspace_memory_region *mem) 652 struct kvm_userspace_memory_region *mem)
543{ 653{
@@ -559,13 +669,17 @@ int kvmppc_core_init_vm(struct kvm *kvm)
559 669
560 /* Allocate hashed page table */ 670 /* Allocate hashed page table */
561 r = kvmppc_alloc_hpt(kvm); 671 r = kvmppc_alloc_hpt(kvm);
672 if (r)
673 return r;
562 674
563 return r; 675 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
676 return 0;
564} 677}
565 678
566void kvmppc_core_destroy_vm(struct kvm *kvm) 679void kvmppc_core_destroy_vm(struct kvm *kvm)
567{ 680{
568 kvmppc_free_hpt(kvm); 681 kvmppc_free_hpt(kvm);
682 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
569} 683}
570 684
571/* These are stubs for now */ 685/* These are stubs for now */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 319ff63b1f3..e6adaadcdff 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -754,7 +754,7 @@ hcall_real_table:
754 .long 0 /* 0x14 - H_CLEAR_REF */ 754 .long 0 /* 0x14 - H_CLEAR_REF */
755 .long .kvmppc_h_protect - hcall_real_table 755 .long .kvmppc_h_protect - hcall_real_table
756 .long 0 /* 0x1c - H_GET_TCE */ 756 .long 0 /* 0x1c - H_GET_TCE */
757 .long 0 /* 0x20 - H_SET_TCE */ 757 .long .kvmppc_h_put_tce - hcall_real_table
758 .long 0 /* 0x24 - H_SET_SPRG0 */ 758 .long 0 /* 0x24 - H_SET_SPRG0 */
759 .long .kvmppc_h_set_dabr - hcall_real_table 759 .long .kvmppc_h_set_dabr - hcall_real_table
760 .long 0 /* 0x2c */ 760 .long 0 /* 0x2c */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6fc9ee499b6..c78ceb9d560 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -203,6 +203,11 @@ int kvm_dev_ioctl_check_extension(long ext)
203 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 203 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
204 break; 204 break;
205#endif 205#endif
206#ifdef CONFIG_KVM_BOOK3S_64_HV
207 case KVM_CAP_SPAPR_TCE:
208 r = 1;
209 break;
210#endif
206 default: 211 default:
207 r = 0; 212 r = 0;
208 break; 213 break;
@@ -653,6 +658,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
653 658
654 break; 659 break;
655 } 660 }
661#ifdef CONFIG_KVM_BOOK3S_64_HV
662 case KVM_CREATE_SPAPR_TCE: {
663 struct kvm_create_spapr_tce create_tce;
664 struct kvm *kvm = filp->private_data;
665
666 r = -EFAULT;
667 if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
668 goto out;
669 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
670 goto out;
671 }
672#endif /* CONFIG_KVM_BOOK3S_64_HV */
673
656 default: 674 default:
657 r = -ENOTTY; 675 r = -ENOTTY;
658 } 676 }
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a156294fc22..61f56502732 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -550,6 +550,7 @@ struct kvm_ppc_pvinfo {
550#define KVM_CAP_TSC_CONTROL 60 550#define KVM_CAP_TSC_CONTROL 60
551#define KVM_CAP_GET_TSC_KHZ 61 551#define KVM_CAP_GET_TSC_KHZ 61
552#define KVM_CAP_PPC_BOOKE_SREGS 62 552#define KVM_CAP_PPC_BOOKE_SREGS 62
553#define KVM_CAP_SPAPR_TCE 63
553 554
554#ifdef KVM_CAP_IRQ_ROUTING 555#ifdef KVM_CAP_IRQ_ROUTING
555 556
@@ -752,6 +753,7 @@ struct kvm_clock_data {
752/* Available with KVM_CAP_XCRS */ 753/* Available with KVM_CAP_XCRS */
753#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) 754#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs)
754#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) 755#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs)
756#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
755 757
756#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 758#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
757 759