aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-06-28 20:25:44 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 06:16:57 -0400
commitaa04b4cc5be64b4fb9ef4e0fdf2418e2f4737fb2 (patch)
tree97a3ff14e43424e28a27e0f3be088649818c1b76 /arch/powerpc/kvm
parent371fefd6f2dc46668e00871930dde613b88d4bde (diff)
KVM: PPC: Allocate RMAs (Real Mode Areas) at boot for use by guests
This adds infrastructure which will be needed to allow book3s_hv KVM to run on older POWER processors, including PPC970, which don't support the Virtual Real Mode Area (VRMA) facility, but only the Real Mode Offset (RMO) facility. These processors require a physically contiguous, aligned area of memory for each guest. When the guest does an access in real mode (MMU off), the address is compared against a limit value, and if it is lower, the address is ORed with an offset value (from the Real Mode Offset Register (RMOR)) and the result becomes the real address for the access. The size of the RMA has to be one of a set of supported values, which usually includes 64MB, 128MB, 256MB and some larger powers of 2. Since we are unlikely to be able to allocate 64MB or more of physically contiguous memory after the kernel has been running for a while, we allocate a pool of RMAs at boot time using the bootmem allocator. The size and number of the RMAs can be set using the kvm_rma_size=xx and kvm_rma_count=xx kernel command line options. KVM exports a new capability, KVM_CAP_PPC_RMA, to signal the availability of the pool of preallocated RMAs. The capability value is 1 if the processor can use an RMA but doesn't require one (because it supports the VRMA facility), or 2 if the processor requires an RMA for each guest. This adds a new ioctl, KVM_ALLOCATE_RMA, which allocates an RMA from the pool and returns a file descriptor which can be used to map the RMA. It also returns the size of the RMA in the argument structure. Having an RMA means we will get multiple KMV_SET_USER_MEMORY_REGION ioctl calls from userspace. To cope with this, we now preallocate the kvm->arch.ram_pginfo array when the VM is created with a size sufficient for up to 64GB of guest memory. Subsequently we will get rid of this array and use memory associated with each memslot instead. This moves most of the code that translates the user addresses into host pfns (page frame numbers) out of kvmppc_prepare_vrma up one level to kvmppc_core_prepare_memory_region. Also, instead of having to look up the VMA for each page in order to check the page size, we now check that the pages we get are compound pages of 16MB. However, if we are adding memory that is mapped to an RMA, we don't bother with calling get_user_pages_fast and instead just offset from the base pfn for the RMA. Typically the RMA gets added after vcpus are created, which makes it inconvenient to have the LPCR (logical partition control register) value in the vcpu->arch struct, since the LPCR controls whether the processor uses RMA or VRMA for the guest. This moves the LPCR value into the kvm->arch struct and arranges for the MER (mediated external request) bit, which is the only bit that varies between vcpus, to be set in assembly code when going into the guest if there is a pending external interrupt request. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/Makefile3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c97
-rw-r--r--arch/powerpc/kvm/book3s_hv.c259
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c152
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S19
-rw-r--r--arch/powerpc/kvm/powerpc.c13
6 files changed, 434 insertions, 109 deletions
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 1de3d54901d..08428e2c188 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -56,7 +56,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
56 book3s_64_mmu_hv.o 56 book3s_64_mmu_hv.o
57kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 57kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
58 book3s_hv_rm_mmu.o \ 58 book3s_hv_rm_mmu.o \
59 book3s_64_vio_hv.o 59 book3s_64_vio_hv.o \
60 book3s_hv_builtin.o
60 61
61kvm-book3s_64-module-objs := \ 62kvm-book3s_64-module-objs := \
62 ../../../virt/kvm/kvm_main.o \ 63 ../../../virt/kvm/kvm_main.o \
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 4a4fbec61a1..96ba96a16ab 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -79,103 +79,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
79 79
80void kvmppc_free_hpt(struct kvm *kvm) 80void kvmppc_free_hpt(struct kvm *kvm)
81{ 81{
82 unsigned long i;
83 struct kvmppc_pginfo *pginfo;
84
85 clear_bit(kvm->arch.lpid, lpid_inuse); 82 clear_bit(kvm->arch.lpid, lpid_inuse);
86 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); 83 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
87
88 if (kvm->arch.ram_pginfo) {
89 pginfo = kvm->arch.ram_pginfo;
90 kvm->arch.ram_pginfo = NULL;
91 for (i = 0; i < kvm->arch.ram_npages; ++i)
92 put_page(pfn_to_page(pginfo[i].pfn));
93 kfree(pginfo);
94 }
95}
96
97static unsigned long user_page_size(unsigned long addr)
98{
99 struct vm_area_struct *vma;
100 unsigned long size = PAGE_SIZE;
101
102 down_read(&current->mm->mmap_sem);
103 vma = find_vma(current->mm, addr);
104 if (vma)
105 size = vma_kernel_pagesize(vma);
106 up_read(&current->mm->mmap_sem);
107 return size;
108}
109
110static pfn_t hva_to_pfn(unsigned long addr)
111{
112 struct page *page[1];
113 int npages;
114
115 might_sleep();
116
117 npages = get_user_pages_fast(addr, 1, 1, page);
118
119 if (unlikely(npages != 1))
120 return 0;
121
122 return page_to_pfn(page[0]);
123}
124
125long kvmppc_prepare_vrma(struct kvm *kvm,
126 struct kvm_userspace_memory_region *mem)
127{
128 unsigned long psize, porder;
129 unsigned long i, npages;
130 struct kvmppc_pginfo *pginfo;
131 pfn_t pfn;
132 unsigned long hva;
133
134 /* First see what page size we have */
135 psize = user_page_size(mem->userspace_addr);
136 /* For now, only allow 16MB pages */
137 if (psize != 1ul << VRMA_PAGE_ORDER || (mem->memory_size & (psize - 1))) {
138 pr_err("bad psize=%lx memory_size=%llx @ %llx\n",
139 psize, mem->memory_size, mem->userspace_addr);
140 return -EINVAL;
141 }
142 porder = __ilog2(psize);
143
144 npages = mem->memory_size >> porder;
145 pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo), GFP_KERNEL);
146 if (!pginfo) {
147 pr_err("kvmppc_prepare_vrma: couldn't alloc %lu bytes\n",
148 npages * sizeof(struct kvmppc_pginfo));
149 return -ENOMEM;
150 }
151
152 for (i = 0; i < npages; ++i) {
153 hva = mem->userspace_addr + (i << porder);
154 if (user_page_size(hva) != psize)
155 goto err;
156 pfn = hva_to_pfn(hva);
157 if (pfn == 0) {
158 pr_err("oops, no pfn for hva %lx\n", hva);
159 goto err;
160 }
161 if (pfn & ((1ul << (porder - PAGE_SHIFT)) - 1)) {
162 pr_err("oops, unaligned pfn %llx\n", pfn);
163 put_page(pfn_to_page(pfn));
164 goto err;
165 }
166 pginfo[i].pfn = pfn;
167 }
168
169 kvm->arch.ram_npages = npages;
170 kvm->arch.ram_psize = psize;
171 kvm->arch.ram_porder = porder;
172 kvm->arch.ram_pginfo = pginfo;
173
174 return 0;
175
176 err:
177 kfree(pginfo);
178 return -EINVAL;
179} 84}
180 85
181void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) 86void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
@@ -199,6 +104,8 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
199 104
200 for (i = 0; i < npages; ++i) { 105 for (i = 0; i < npages; ++i) {
201 pfn = pginfo[i].pfn; 106 pfn = pginfo[i].pfn;
107 if (!pfn)
108 break;
202 /* can't use hpt_hash since va > 64 bits */ 109 /* can't use hpt_hash since va > 64 bits */
203 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; 110 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
204 /* 111 /*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 36b6d98f119..04da135cae6 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -27,6 +27,8 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/anon_inodes.h> 28#include <linux/anon_inodes.h>
29#include <linux/cpumask.h> 29#include <linux/cpumask.h>
30#include <linux/spinlock.h>
31#include <linux/page-flags.h>
30 32
31#include <asm/reg.h> 33#include <asm/reg.h>
32#include <asm/cputable.h> 34#include <asm/cputable.h>
@@ -40,11 +42,22 @@
40#include <asm/lppaca.h> 42#include <asm/lppaca.h>
41#include <asm/processor.h> 43#include <asm/processor.h>
42#include <asm/cputhreads.h> 44#include <asm/cputhreads.h>
45#include <asm/page.h>
43#include <linux/gfp.h> 46#include <linux/gfp.h>
44#include <linux/sched.h> 47#include <linux/sched.h>
45#include <linux/vmalloc.h> 48#include <linux/vmalloc.h>
46#include <linux/highmem.h> 49#include <linux/highmem.h>
47 50
51/*
52 * For now, limit memory to 64GB and require it to be large pages.
53 * This value is chosen because it makes the ram_pginfo array be
54 * 64kB in size, which is about as large as we want to be trying
55 * to allocate with kmalloc.
56 */
57#define MAX_MEM_ORDER 36
58
59#define LARGE_PAGE_ORDER 24 /* 16MB pages */
60
48/* #define EXIT_DEBUG */ 61/* #define EXIT_DEBUG */
49/* #define EXIT_DEBUG_SIMPLE */ 62/* #define EXIT_DEBUG_SIMPLE */
50/* #define EXIT_DEBUG_INT */ 63/* #define EXIT_DEBUG_INT */
@@ -129,7 +142,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
129 pr_err(" ESID = %.16llx VSID = %.16llx\n", 142 pr_err(" ESID = %.16llx VSID = %.16llx\n",
130 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); 143 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
131 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", 144 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
132 vcpu->arch.lpcr, vcpu->kvm->arch.sdr1, 145 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
133 vcpu->arch.last_inst); 146 vcpu->arch.last_inst);
134} 147}
135 148
@@ -441,7 +454,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
441 int err = -EINVAL; 454 int err = -EINVAL;
442 int core; 455 int core;
443 struct kvmppc_vcore *vcore; 456 struct kvmppc_vcore *vcore;
444 unsigned long lpcr;
445 457
446 core = id / threads_per_core; 458 core = id / threads_per_core;
447 if (core >= KVM_MAX_VCORES) 459 if (core >= KVM_MAX_VCORES)
@@ -464,10 +476,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
464 vcpu->arch.pvr = mfspr(SPRN_PVR); 476 vcpu->arch.pvr = mfspr(SPRN_PVR);
465 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 477 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
466 478
467 lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES);
468 lpcr |= LPCR_VPM0 | LPCR_VRMA_L | (4UL << LPCR_DPFD_SH) | LPCR_HDICE;
469 vcpu->arch.lpcr = lpcr;
470
471 kvmppc_mmu_book3s_hv_init(vcpu); 479 kvmppc_mmu_book3s_hv_init(vcpu);
472 480
473 /* 481 /*
@@ -910,24 +918,216 @@ fail:
910 return ret; 918 return ret;
911} 919}
912 920
921/* Work out RMLS (real mode limit selector) field value for a given RMA size.
922 Assumes POWER7. */
923static inline int lpcr_rmls(unsigned long rma_size)
924{
925 switch (rma_size) {
926 case 32ul << 20: /* 32 MB */
927 return 8;
928 case 64ul << 20: /* 64 MB */
929 return 3;
930 case 128ul << 20: /* 128 MB */
931 return 7;
932 case 256ul << 20: /* 256 MB */
933 return 4;
934 case 1ul << 30: /* 1 GB */
935 return 2;
936 case 16ul << 30: /* 16 GB */
937 return 1;
938 case 256ul << 30: /* 256 GB */
939 return 0;
940 default:
941 return -1;
942 }
943}
944
945static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
946{
947 struct kvmppc_rma_info *ri = vma->vm_file->private_data;
948 struct page *page;
949
950 if (vmf->pgoff >= ri->npages)
951 return VM_FAULT_SIGBUS;
952
953 page = pfn_to_page(ri->base_pfn + vmf->pgoff);
954 get_page(page);
955 vmf->page = page;
956 return 0;
957}
958
959static const struct vm_operations_struct kvm_rma_vm_ops = {
960 .fault = kvm_rma_fault,
961};
962
963static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
964{
965 vma->vm_flags |= VM_RESERVED;
966 vma->vm_ops = &kvm_rma_vm_ops;
967 return 0;
968}
969
970static int kvm_rma_release(struct inode *inode, struct file *filp)
971{
972 struct kvmppc_rma_info *ri = filp->private_data;
973
974 kvm_release_rma(ri);
975 return 0;
976}
977
978static struct file_operations kvm_rma_fops = {
979 .mmap = kvm_rma_mmap,
980 .release = kvm_rma_release,
981};
982
983long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
984{
985 struct kvmppc_rma_info *ri;
986 long fd;
987
988 ri = kvm_alloc_rma();
989 if (!ri)
990 return -ENOMEM;
991
992 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
993 if (fd < 0)
994 kvm_release_rma(ri);
995
996 ret->rma_size = ri->npages << PAGE_SHIFT;
997 return fd;
998}
999
1000static struct page *hva_to_page(unsigned long addr)
1001{
1002 struct page *page[1];
1003 int npages;
1004
1005 might_sleep();
1006
1007 npages = get_user_pages_fast(addr, 1, 1, page);
1008
1009 if (unlikely(npages != 1))
1010 return 0;
1011
1012 return page[0];
1013}
1014
913int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1015int kvmppc_core_prepare_memory_region(struct kvm *kvm,
914 struct kvm_userspace_memory_region *mem) 1016 struct kvm_userspace_memory_region *mem)
915{ 1017{
916 if (mem->guest_phys_addr == 0 && mem->memory_size != 0) 1018 unsigned long psize, porder;
917 return kvmppc_prepare_vrma(kvm, mem); 1019 unsigned long i, npages, totalpages;
1020 unsigned long pg_ix;
1021 struct kvmppc_pginfo *pginfo;
1022 unsigned long hva;
1023 struct kvmppc_rma_info *ri = NULL;
1024 struct page *page;
1025
1026 /* For now, only allow 16MB pages */
1027 porder = LARGE_PAGE_ORDER;
1028 psize = 1ul << porder;
1029 if ((mem->memory_size & (psize - 1)) ||
1030 (mem->guest_phys_addr & (psize - 1))) {
1031 pr_err("bad memory_size=%llx @ %llx\n",
1032 mem->memory_size, mem->guest_phys_addr);
1033 return -EINVAL;
1034 }
1035
1036 npages = mem->memory_size >> porder;
1037 totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
1038
1039 /* More memory than we have space to track? */
1040 if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
1041 return -EINVAL;
1042
1043 /* Do we already have an RMA registered? */
1044 if (mem->guest_phys_addr == 0 && kvm->arch.rma)
1045 return -EINVAL;
1046
1047 if (totalpages > kvm->arch.ram_npages)
1048 kvm->arch.ram_npages = totalpages;
1049
1050 /* Is this one of our preallocated RMAs? */
1051 if (mem->guest_phys_addr == 0) {
1052 struct vm_area_struct *vma;
1053
1054 down_read(&current->mm->mmap_sem);
1055 vma = find_vma(current->mm, mem->userspace_addr);
1056 if (vma && vma->vm_file &&
1057 vma->vm_file->f_op == &kvm_rma_fops &&
1058 mem->userspace_addr == vma->vm_start)
1059 ri = vma->vm_file->private_data;
1060 up_read(&current->mm->mmap_sem);
1061 }
1062
1063 if (ri) {
1064 unsigned long rma_size;
1065 unsigned long lpcr;
1066 long rmls;
1067
1068 rma_size = ri->npages << PAGE_SHIFT;
1069 if (rma_size > mem->memory_size)
1070 rma_size = mem->memory_size;
1071 rmls = lpcr_rmls(rma_size);
1072 if (rmls < 0) {
1073 pr_err("Can't use RMA of 0x%lx bytes\n", rma_size);
1074 return -EINVAL;
1075 }
1076 atomic_inc(&ri->use_count);
1077 kvm->arch.rma = ri;
1078 kvm->arch.n_rma_pages = rma_size >> porder;
1079 lpcr = kvm->arch.lpcr & ~(LPCR_VPM0 | LPCR_VRMA_L);
1080 lpcr |= rmls << LPCR_RMLS_SH;
1081 kvm->arch.lpcr = lpcr;
1082 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1083 pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n",
1084 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
1085 }
1086
1087 pg_ix = mem->guest_phys_addr >> porder;
1088 pginfo = kvm->arch.ram_pginfo + pg_ix;
1089 for (i = 0; i < npages; ++i, ++pg_ix) {
1090 if (ri && pg_ix < kvm->arch.n_rma_pages) {
1091 pginfo[i].pfn = ri->base_pfn +
1092 (pg_ix << (porder - PAGE_SHIFT));
1093 continue;
1094 }
1095 hva = mem->userspace_addr + (i << porder);
1096 page = hva_to_page(hva);
1097 if (!page) {
1098 pr_err("oops, no pfn for hva %lx\n", hva);
1099 goto err;
1100 }
1101 /* Check it's a 16MB page */
1102 if (!PageHead(page) ||
1103 compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
1104 pr_err("page at %lx isn't 16MB (o=%d)\n",
1105 hva, compound_order(page));
1106 goto err;
1107 }
1108 pginfo[i].pfn = page_to_pfn(page);
1109 }
1110
918 return 0; 1111 return 0;
1112
1113 err:
1114 return -EINVAL;
919} 1115}
920 1116
921void kvmppc_core_commit_memory_region(struct kvm *kvm, 1117void kvmppc_core_commit_memory_region(struct kvm *kvm,
922 struct kvm_userspace_memory_region *mem) 1118 struct kvm_userspace_memory_region *mem)
923{ 1119{
924 if (mem->guest_phys_addr == 0 && mem->memory_size != 0) 1120 if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
1121 !kvm->arch.rma)
925 kvmppc_map_vrma(kvm, mem); 1122 kvmppc_map_vrma(kvm, mem);
926} 1123}
927 1124
928int kvmppc_core_init_vm(struct kvm *kvm) 1125int kvmppc_core_init_vm(struct kvm *kvm)
929{ 1126{
930 long r; 1127 long r;
1128 unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
1129 long err = -ENOMEM;
1130 unsigned long lpcr;
931 1131
932 /* Allocate hashed page table */ 1132 /* Allocate hashed page table */
933 r = kvmppc_alloc_hpt(kvm); 1133 r = kvmppc_alloc_hpt(kvm);
@@ -935,11 +1135,52 @@ int kvmppc_core_init_vm(struct kvm *kvm)
935 return r; 1135 return r;
936 1136
937 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1137 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1138
1139 kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
1140 GFP_KERNEL);
1141 if (!kvm->arch.ram_pginfo) {
1142 pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
1143 npages * sizeof(struct kvmppc_pginfo));
1144 goto out_free;
1145 }
1146
1147 kvm->arch.ram_npages = 0;
1148 kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
1149 kvm->arch.ram_porder = LARGE_PAGE_ORDER;
1150 kvm->arch.rma = NULL;
1151 kvm->arch.n_rma_pages = 0;
1152
1153 lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES);
1154 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
1155 LPCR_VPM0 | LPCR_VRMA_L;
1156 kvm->arch.lpcr = lpcr;
1157
1158
938 return 0; 1159 return 0;
1160
1161 out_free:
1162 kvmppc_free_hpt(kvm);
1163 return err;
939} 1164}
940 1165
941void kvmppc_core_destroy_vm(struct kvm *kvm) 1166void kvmppc_core_destroy_vm(struct kvm *kvm)
942{ 1167{
1168 struct kvmppc_pginfo *pginfo;
1169 unsigned long i;
1170
1171 if (kvm->arch.ram_pginfo) {
1172 pginfo = kvm->arch.ram_pginfo;
1173 kvm->arch.ram_pginfo = NULL;
1174 for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
1175 if (pginfo[i].pfn)
1176 put_page(pfn_to_page(pginfo[i].pfn));
1177 kfree(pginfo);
1178 }
1179 if (kvm->arch.rma) {
1180 kvm_release_rma(kvm->arch.rma);
1181 kvm->arch.rma = NULL;
1182 }
1183
943 kvmppc_free_hpt(kvm); 1184 kvmppc_free_hpt(kvm);
944 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1185 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
945} 1186}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
new file mode 100644
index 00000000000..736df3cbbc5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -0,0 +1,152 @@
1/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/kvm_host.h>
10#include <linux/preempt.h>
11#include <linux/sched.h>
12#include <linux/spinlock.h>
13#include <linux/bootmem.h>
14#include <linux/init.h>
15
16#include <asm/cputable.h>
17#include <asm/kvm_ppc.h>
18#include <asm/kvm_book3s.h>
19
20/*
21 * This maintains a list of RMAs (real mode areas) for KVM guests to use.
22 * Each RMA has to be physically contiguous and of a size that the
23 * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
24 * and other larger sizes. Since we are unlikely to be allocate that
25 * much physically contiguous memory after the system is up and running,
26 * we preallocate a set of RMAs in early boot for KVM to use.
27 */
28static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
29static unsigned long kvm_rma_count;
30
31static int __init early_parse_rma_size(char *p)
32{
33 if (!p)
34 return 1;
35
36 kvm_rma_size = memparse(p, &p);
37
38 return 0;
39}
40early_param("kvm_rma_size", early_parse_rma_size);
41
42static int __init early_parse_rma_count(char *p)
43{
44 if (!p)
45 return 1;
46
47 kvm_rma_count = simple_strtoul(p, NULL, 0);
48
49 return 0;
50}
51early_param("kvm_rma_count", early_parse_rma_count);
52
53static struct kvmppc_rma_info *rma_info;
54static LIST_HEAD(free_rmas);
55static DEFINE_SPINLOCK(rma_lock);
56
57/* Work out RMLS (real mode limit selector) field value for a given RMA size.
58 Assumes POWER7. */
59static inline int lpcr_rmls(unsigned long rma_size)
60{
61 switch (rma_size) {
62 case 32ul << 20: /* 32 MB */
63 return 8;
64 case 64ul << 20: /* 64 MB */
65 return 3;
66 case 128ul << 20: /* 128 MB */
67 return 7;
68 case 256ul << 20: /* 256 MB */
69 return 4;
70 case 1ul << 30: /* 1 GB */
71 return 2;
72 case 16ul << 30: /* 16 GB */
73 return 1;
74 case 256ul << 30: /* 256 GB */
75 return 0;
76 default:
77 return -1;
78 }
79}
80
81/*
82 * Called at boot time while the bootmem allocator is active,
83 * to allocate contiguous physical memory for the real memory
84 * areas for guests.
85 */
86void kvm_rma_init(void)
87{
88 unsigned long i;
89 unsigned long j, npages;
90 void *rma;
91 struct page *pg;
92
93 /* Only do this on POWER7 in HV mode */
94 if (!cpu_has_feature(CPU_FTR_HVMODE_206))
95 return;
96
97 if (!kvm_rma_size || !kvm_rma_count)
98 return;
99
100 /* Check that the requested size is one supported in hardware */
101 if (lpcr_rmls(kvm_rma_size) < 0) {
102 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
103 return;
104 }
105
106 npages = kvm_rma_size >> PAGE_SHIFT;
107 rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info));
108 for (i = 0; i < kvm_rma_count; ++i) {
109 rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size);
110 pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma,
111 kvm_rma_size >> 20);
112 rma_info[i].base_virt = rma;
113 rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT;
114 rma_info[i].npages = npages;
115 list_add_tail(&rma_info[i].list, &free_rmas);
116 atomic_set(&rma_info[i].use_count, 0);
117
118 pg = pfn_to_page(rma_info[i].base_pfn);
119 for (j = 0; j < npages; ++j) {
120 atomic_inc(&pg->_count);
121 ++pg;
122 }
123 }
124}
125
126struct kvmppc_rma_info *kvm_alloc_rma(void)
127{
128 struct kvmppc_rma_info *ri;
129
130 ri = NULL;
131 spin_lock(&rma_lock);
132 if (!list_empty(&free_rmas)) {
133 ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list);
134 list_del(&ri->list);
135 atomic_inc(&ri->use_count);
136 }
137 spin_unlock(&rma_lock);
138 return ri;
139}
140EXPORT_SYMBOL_GPL(kvm_alloc_rma);
141
142void kvm_release_rma(struct kvmppc_rma_info *ri)
143{
144 if (atomic_dec_and_test(&ri->use_count)) {
145 spin_lock(&rma_lock);
146 list_add_tail(&ri->list, &free_rmas);
147 spin_unlock(&rma_lock);
148
149 }
150}
151EXPORT_SYMBOL_GPL(kvm_release_rma);
152
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c9bf177b7cf..9ee223c3528 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -235,10 +235,10 @@ kvmppc_hv_entry:
235 bne 21b 235 bne 21b
236 236
237 /* Primary thread switches to guest partition. */ 237 /* Primary thread switches to guest partition. */
238 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
238 lwz r6,VCPU_PTID(r4) 239 lwz r6,VCPU_PTID(r4)
239 cmpwi r6,0 240 cmpwi r6,0
240 bne 20f 241 bne 20f
241 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
242 ld r6,KVM_SDR1(r9) 242 ld r6,KVM_SDR1(r9)
243 lwz r7,KVM_LPID(r9) 243 lwz r7,KVM_LPID(r9)
244 li r0,LPID_RSVD /* switch to reserved LPID */ 244 li r0,LPID_RSVD /* switch to reserved LPID */
@@ -255,8 +255,18 @@ kvmppc_hv_entry:
25520: lbz r0,VCORE_IN_GUEST(r5) 25520: lbz r0,VCORE_IN_GUEST(r5)
256 cmpwi r0,0 256 cmpwi r0,0
257 beq 20b 257 beq 20b
25810: ld r8,VCPU_LPCR(r4) 258
259 mtspr SPRN_LPCR,r8 259 /* Set LPCR. Set the MER bit if there is a pending external irq. */
26010: ld r8,KVM_LPCR(r9)
261 ld r0,VCPU_PENDING_EXC(r4)
262 li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
263 oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
264 and. r0,r0,r7
265 beq 11f
266 ori r8,r8,LPCR_MER
26711: mtspr SPRN_LPCR,r8
268 ld r8,KVM_RMOR(r9)
269 mtspr SPRN_RMOR,r8
260 isync 270 isync
261 271
262 /* Check if HDEC expires soon */ 272 /* Check if HDEC expires soon */
@@ -464,7 +474,8 @@ hcall_real_cont:
464 /* Check for mediated interrupts (could be done earlier really ...) */ 474 /* Check for mediated interrupts (could be done earlier really ...) */
465 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL 475 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
466 bne+ 1f 476 bne+ 1f
467 ld r5,VCPU_LPCR(r9) 477 ld r5,VCPU_KVM(r9)
478 ld r5,KVM_LPCR(r5)
468 andi. r0,r11,MSR_EE 479 andi. r0,r11,MSR_EE
469 beq 1f 480 beq 1f
470 andi. r0,r5,LPCR_MER 481 andi. r0,r5,LPCR_MER
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c549664c98..72c506505fa 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -211,6 +211,9 @@ int kvm_dev_ioctl_check_extension(long ext)
211 case KVM_CAP_PPC_SMT: 211 case KVM_CAP_PPC_SMT:
212 r = threads_per_core; 212 r = threads_per_core;
213 break; 213 break;
214 case KVM_CAP_PPC_RMA:
215 r = 1;
216 break;
214#endif 217#endif
215 default: 218 default:
216 r = 0; 219 r = 0;
@@ -673,6 +676,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
673 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); 676 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
674 goto out; 677 goto out;
675 } 678 }
679
680 case KVM_ALLOCATE_RMA: {
681 struct kvm *kvm = filp->private_data;
682 struct kvm_allocate_rma rma;
683
684 r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
685 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
686 r = -EFAULT;
687 break;
688 }
676#endif /* CONFIG_KVM_BOOK3S_64_HV */ 689#endif /* CONFIG_KVM_BOOK3S_64_HV */
677 690
678 default: 691 default: