aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/kvm
diff options
context:
space:
mode:
authorDavid Hildenbrand <dahi@linux.vnet.ibm.com>2015-07-08 07:19:48 -0400
committerChristian Borntraeger <borntraeger@de.ibm.com>2016-06-21 03:43:33 -0400
commita3508fbe9dc6dd3bece0c7bf889cc085a011738c (patch)
treef955df7a6d940b1e80170386857252f02a386d7f /arch/s390/kvm
parentdf9b2b4a4aa49f874f8507680a533369e4b9c378 (diff)
KVM: s390: vsie: initial support for nested virtualization
This patch adds basic support for nested virtualization on s390x, called VSIE (virtual SIE) and allows it to be used by the guest if the necessary facilities are supported by the hardware and enabled for the guest. In order to make this work, we have to shadow the sie control block provided by guest 2. In order to gain some performance, we have to reuse the same shadow blocks as good as possible. For now, we allow as many shadow blocks as we have VCPUs (that way, every VCPU can run the VSIE concurrently). We have to watch out for the prefix getting unmapped out of our shadow gmap and properly get the VCPU out of VSIE in that case, to fault the prefix pages back in. We use the PROG_REQUEST bit for that purpose. This patch is based on an initial prototype by Tobias Elpelt. Acked-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/kvm-s390.c15
-rw-r--r--arch/s390/kvm/kvm-s390.h7
-rw-r--r--arch/s390/kvm/priv.c1
-rw-r--r--arch/s390/kvm/vsie.c755
5 files changed, 779 insertions, 1 deletions
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 82e73e2b953d..09a9e6dfc09f 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
12ccflags-y := -Ivirt/kvm -Iarch/s390/kvm 12ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
13 13
14kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o 14kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
15kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o 15kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o vsie.o
16 16
17obj-$(CONFIG_KVM) += kvm.o 17obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index a890f7d20711..3fb124226e97 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -99,6 +99,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
99 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 99 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
100 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 100 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
101 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 101 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
102 { "instruction_sie", VCPU_STAT(instruction_sie) },
102 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 103 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
103 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 104 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
104 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 105 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
@@ -142,6 +143,7 @@ static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS)
142static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 143static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
143 144
144static struct gmap_notifier gmap_notifier; 145static struct gmap_notifier gmap_notifier;
146static struct gmap_notifier vsie_gmap_notifier;
145debug_info_t *kvm_s390_dbf; 147debug_info_t *kvm_s390_dbf;
146 148
147/* Section: not file related */ 149/* Section: not file related */
@@ -187,6 +189,8 @@ int kvm_arch_hardware_setup(void)
187{ 189{
188 gmap_notifier.notifier_call = kvm_gmap_notifier; 190 gmap_notifier.notifier_call = kvm_gmap_notifier;
189 gmap_register_pte_notifier(&gmap_notifier); 191 gmap_register_pte_notifier(&gmap_notifier);
192 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
193 gmap_register_pte_notifier(&vsie_gmap_notifier);
190 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 194 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
191 &kvm_clock_notifier); 195 &kvm_clock_notifier);
192 return 0; 196 return 0;
@@ -195,6 +199,7 @@ int kvm_arch_hardware_setup(void)
195void kvm_arch_hardware_unsetup(void) 199void kvm_arch_hardware_unsetup(void)
196{ 200{
197 gmap_unregister_pte_notifier(&gmap_notifier); 201 gmap_unregister_pte_notifier(&gmap_notifier);
202 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
198 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 203 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
199 &kvm_clock_notifier); 204 &kvm_clock_notifier);
200} 205}
@@ -252,6 +257,14 @@ static void kvm_s390_cpu_feat_init(void)
252 257
253 if (MACHINE_HAS_ESOP) 258 if (MACHINE_HAS_ESOP)
254 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 259 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
260 /*
261 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
262 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
263 */
264 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
265 !test_facility(3))
266 return;
267 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
255} 268}
256 269
257int kvm_arch_init(void *opaque) 270int kvm_arch_init(void *opaque)
@@ -1406,6 +1419,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1406 kvm->arch.epoch = 0; 1419 kvm->arch.epoch = 0;
1407 1420
1408 spin_lock_init(&kvm->arch.start_stop_lock); 1421 spin_lock_init(&kvm->arch.start_stop_lock);
1422 kvm_s390_vsie_init(kvm);
1409 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 1423 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1410 1424
1411 return 0; 1425 return 0;
@@ -1463,6 +1477,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
1463 gmap_remove(kvm->arch.gmap); 1477 gmap_remove(kvm->arch.gmap);
1464 kvm_s390_destroy_adapters(kvm); 1478 kvm_s390_destroy_adapters(kvm);
1465 kvm_s390_clear_float_irqs(kvm); 1479 kvm_s390_clear_float_irqs(kvm);
1480 kvm_s390_vsie_destroy(kvm);
1466 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 1481 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1467} 1482}
1468 1483
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 52aa47e112d8..b137fbaac91c 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -252,6 +252,13 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
252int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); 252int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
253int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); 253int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
254 254
255/* implemented in vsie.c */
256int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
257void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
258 unsigned long end);
259void kvm_s390_vsie_init(struct kvm *kvm);
260void kvm_s390_vsie_destroy(struct kvm *kvm);
261
255/* implemented in sigp.c */ 262/* implemented in sigp.c */
256int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); 263int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
257int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); 264int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 3db3be139992..c77ad2dc334f 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -719,6 +719,7 @@ static const intercept_handler_t b2_handlers[256] = {
719 [0x10] = handle_set_prefix, 719 [0x10] = handle_set_prefix,
720 [0x11] = handle_store_prefix, 720 [0x11] = handle_store_prefix,
721 [0x12] = handle_store_cpu_address, 721 [0x12] = handle_store_cpu_address,
722 [0x14] = kvm_s390_handle_vsie,
722 [0x21] = handle_ipte_interlock, 723 [0x21] = handle_ipte_interlock,
723 [0x29] = handle_iske, 724 [0x29] = handle_iske,
724 [0x2a] = handle_rrbe, 725 [0x2a] = handle_rrbe,
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
new file mode 100644
index 000000000000..747d4f900155
--- /dev/null
+++ b/arch/s390/kvm/vsie.c
@@ -0,0 +1,755 @@
1/*
2 * kvm nested virtualization support for s390x
3 *
4 * Copyright IBM Corp. 2016
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
11 */
12#include <linux/vmalloc.h>
13#include <linux/kvm_host.h>
14#include <linux/bug.h>
15#include <linux/list.h>
16#include <linux/bitmap.h>
17#include <asm/gmap.h>
18#include <asm/mmu_context.h>
19#include <asm/sclp.h>
20#include <asm/nmi.h>
21#include "kvm-s390.h"
22#include "gaccess.h"
23
24struct vsie_page {
25 struct kvm_s390_sie_block scb_s; /* 0x0000 */
26 /* the pinned originial scb */
27 struct kvm_s390_sie_block *scb_o; /* 0x0200 */
28 /* the shadow gmap in use by the vsie_page */
29 struct gmap *gmap; /* 0x0208 */
30 __u8 reserved[0x1000 - 0x0210]; /* 0x0210 */
31} __packed;
32
33/* trigger a validity icpt for the given scb */
34static int set_validity_icpt(struct kvm_s390_sie_block *scb,
35 __u16 reason_code)
36{
37 scb->ipa = 0x1000;
38 scb->ipb = ((__u32) reason_code) << 16;
39 scb->icptcode = ICPT_VALIDITY;
40 return 1;
41}
42
43/* mark the prefix as unmapped, this will block the VSIE */
44static void prefix_unmapped(struct vsie_page *vsie_page)
45{
46 atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
47}
48
49/* mark the prefix as unmapped and wait until the VSIE has been left */
50static void prefix_unmapped_sync(struct vsie_page *vsie_page)
51{
52 prefix_unmapped(vsie_page);
53 if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
54 atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
55 while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
56 cpu_relax();
57}
58
59/* mark the prefix as mapped, this will allow the VSIE to run */
60static void prefix_mapped(struct vsie_page *vsie_page)
61{
62 atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
63}
64
65
66/* copy the updated intervention request bits into the shadow scb */
67static void update_intervention_requests(struct vsie_page *vsie_page)
68{
69 const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
70 int cpuflags;
71
72 cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
73 atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
74 atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
75}
76
77/* shadow (filter and validate) the cpuflags */
78static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
79{
80 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
81 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
82 int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
83
84 /* we don't allow ESA/390 guests */
85 if (!(cpuflags & CPUSTAT_ZARCH))
86 return set_validity_icpt(scb_s, 0x0001U);
87
88 if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
89 return set_validity_icpt(scb_s, 0x0001U);
90 else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
91 return set_validity_icpt(scb_s, 0x0007U);
92
93 /* intervention requests will be set later */
94 newflags = CPUSTAT_ZARCH;
95
96 atomic_set(&scb_s->cpuflags, newflags);
97 return 0;
98}
99
100/* unshadow the scb, copying parameters back to the real scb */
101static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
102{
103 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
104 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
105
106 /* interception */
107 scb_o->icptcode = scb_s->icptcode;
108 scb_o->icptstatus = scb_s->icptstatus;
109 scb_o->ipa = scb_s->ipa;
110 scb_o->ipb = scb_s->ipb;
111 scb_o->gbea = scb_s->gbea;
112
113 /* timer */
114 scb_o->cputm = scb_s->cputm;
115 scb_o->ckc = scb_s->ckc;
116 scb_o->todpr = scb_s->todpr;
117
118 /* guest state */
119 scb_o->gpsw = scb_s->gpsw;
120 scb_o->gg14 = scb_s->gg14;
121 scb_o->gg15 = scb_s->gg15;
122 memcpy(scb_o->gcr, scb_s->gcr, 128);
123 scb_o->pp = scb_s->pp;
124
125 /* interrupt intercept */
126 switch (scb_s->icptcode) {
127 case ICPT_PROGI:
128 case ICPT_INSTPROGI:
129 case ICPT_EXTINT:
130 memcpy((void *)((u64)scb_o + 0xc0),
131 (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
132 break;
133 case ICPT_PARTEXEC:
134 /* MVPG only */
135 memcpy((void *)((u64)scb_o + 0xc0),
136 (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
137 break;
138 }
139
140 if (scb_s->ihcpu != 0xffffU)
141 scb_o->ihcpu = scb_s->ihcpu;
142}
143
144/*
145 * Setup the shadow scb by copying and checking the relevant parts of the g2
146 * provided scb.
147 *
148 * Returns: - 0 if the scb has been shadowed
149 * - > 0 if control has to be given to guest 2
150 */
151static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
152{
153 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
154 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
155 int rc;
156
157 /* make sure we don't have any leftovers when reusing the scb */
158 scb_s->icptcode = 0;
159 scb_s->eca = 0;
160 scb_s->ecb = 0;
161 scb_s->ecb2 = 0;
162 scb_s->ecb3 = 0;
163 scb_s->ecd = 0;
164
165 rc = prepare_cpuflags(vcpu, vsie_page);
166 if (rc)
167 goto out;
168
169 /* timer */
170 scb_s->cputm = scb_o->cputm;
171 scb_s->ckc = scb_o->ckc;
172 scb_s->todpr = scb_o->todpr;
173 scb_s->epoch = scb_o->epoch;
174
175 /* guest state */
176 scb_s->gpsw = scb_o->gpsw;
177 scb_s->gg14 = scb_o->gg14;
178 scb_s->gg15 = scb_o->gg15;
179 memcpy(scb_s->gcr, scb_o->gcr, 128);
180 scb_s->pp = scb_o->pp;
181
182 /* interception / execution handling */
183 scb_s->gbea = scb_o->gbea;
184 scb_s->lctl = scb_o->lctl;
185 scb_s->svcc = scb_o->svcc;
186 scb_s->ictl = scb_o->ictl;
187 /*
188 * SKEY handling functions can't deal with false setting of PTE invalid
189 * bits. Therefore we cannot provide interpretation and would later
190 * have to provide own emulation handlers.
191 */
192 scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
193 scb_s->icpua = scb_o->icpua;
194
195 /* SIE will do mso/msl validity and exception checks for us */
196 scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
197 scb_s->mso = scb_o->mso & 0xfffffffffff00000UL;
198 scb_s->prefix = scb_o->prefix;
199
200 /* We have to definetly flush the tlb if this scb never ran */
201 if (scb_s->ihcpu != 0xffffU)
202 scb_s->ihcpu = scb_o->ihcpu;
203
204 /* MVPG and Protection Exception Interpretation are always available */
205 scb_s->eca |= scb_o->eca & 0x01002000U;
206
207out:
208 if (rc)
209 unshadow_scb(vcpu, vsie_page);
210 return rc;
211}
212
213void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
214 unsigned long end)
215{
216 struct kvm *kvm = gmap->private;
217 struct vsie_page *cur;
218 unsigned long prefix;
219 struct page *page;
220 int i;
221
222 if (!gmap_is_shadow(gmap))
223 return;
224 if (start >= 1UL << 31)
225 /* We are only interested in prefix pages */
226 return;
227
228 /*
229 * Only new shadow blocks are added to the list during runtime,
230 * therefore we can safely reference them all the time.
231 */
232 for (i = 0; i < kvm->arch.vsie.page_count; i++) {
233 page = READ_ONCE(kvm->arch.vsie.pages[i]);
234 if (!page)
235 continue;
236 cur = page_to_virt(page);
237 if (READ_ONCE(cur->gmap) != gmap)
238 continue;
239 prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
240 /* with mso/msl, the prefix lies at an offset */
241 prefix += cur->scb_s.mso;
242 if (prefix <= end && start <= prefix + PAGE_SIZE - 1)
243 prefix_unmapped_sync(cur);
244 }
245}
246
247/*
248 * Map the first prefix page.
249 *
250 * The prefix will be protected, a gmap notifier will inform about unmaps.
251 * The shadow scb must not be executed until the prefix is remapped, this is
252 * guaranteed by properly handling PROG_REQUEST.
253 *
254 * Returns: - 0 on if successfully mapped or already mapped
255 * - > 0 if control has to be given to guest 2
256 * - -EAGAIN if the caller can retry immediately
257 * - -ENOMEM if out of memory
258 */
259static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
260{
261 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
262 u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
263 int rc;
264
265 /* mark it as mapped so we can catch any concurrent unmappers */
266 prefix_mapped(vsie_page);
267
268 /* with mso/msl, the prefix lies at offset *mso* */
269 prefix += scb_s->mso;
270
271 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
272 /*
273 * We don't have to mprotect, we will be called for all unshadows.
274 * SIE will detect if protection applies and trigger a validity.
275 */
276 if (rc)
277 prefix_unmapped(vsie_page);
278 if (rc > 0 || rc == -EFAULT)
279 rc = set_validity_icpt(scb_s, 0x0037U);
280 return rc;
281}
282
283/*
284 * Pin the guest page given by gpa and set hpa to the pinned host address.
285 * Will always be pinned writable.
286 *
287 * Returns: - 0 on success
288 * - -EINVAL if the gpa is not valid guest storage
289 * - -ENOMEM if out of memory
290 */
291static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
292{
293 struct page *page;
294 hva_t hva;
295 int rc;
296
297 hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
298 if (kvm_is_error_hva(hva))
299 return -EINVAL;
300 rc = get_user_pages_fast(hva, 1, 1, &page);
301 if (rc < 0)
302 return rc;
303 else if (rc != 1)
304 return -ENOMEM;
305 *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
306 return 0;
307}
308
309/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
310static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
311{
312 struct page *page;
313
314 page = virt_to_page(hpa);
315 set_page_dirty_lock(page);
316 put_page(page);
317 /* mark the page always as dirty for migration */
318 mark_page_dirty(kvm, gpa_to_gfn(gpa));
319}
320
321/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
322static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
323{
324 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
325 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
326 hpa_t hpa;
327 gpa_t gpa;
328
329 hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
330 if (hpa) {
331 gpa = scb_o->scaol & ~0xfUL;
332 unpin_guest_page(vcpu->kvm, gpa, hpa);
333 scb_s->scaol = 0;
334 scb_s->scaoh = 0;
335 }
336}
337
338/*
339 * Instead of shadowing some blocks, we can simply forward them because the
340 * addresses in the scb are 64 bit long.
341 *
342 * This works as long as the data lies in one page. If blocks ever exceed one
343 * page, we have to fall back to shadowing.
344 *
345 * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
346 * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
347 *
348 * Returns: - 0 if all blocks were pinned.
349 * - > 0 if control has to be given to guest 2
350 * - -ENOMEM if out of memory
351 */
352static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
353{
354 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
355 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
356 hpa_t hpa;
357 gpa_t gpa;
358 int rc = 0;
359
360 gpa = scb_o->scaol & ~0xfUL;
361 if (gpa) {
362 if (!(gpa & ~0x1fffUL))
363 rc = set_validity_icpt(scb_s, 0x0038U);
364 else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
365 rc = set_validity_icpt(scb_s, 0x0011U);
366 else if ((gpa & PAGE_MASK) !=
367 ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
368 rc = set_validity_icpt(scb_s, 0x003bU);
369 if (!rc) {
370 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
371 if (rc == -EINVAL)
372 rc = set_validity_icpt(scb_s, 0x0034U);
373 }
374 if (rc)
375 goto unpin;
376 scb_s->scaoh = (u32)((u64)hpa >> 32);
377 scb_s->scaol = (u32)(u64)hpa;
378 }
379 return 0;
380unpin:
381 unpin_blocks(vcpu, vsie_page);
382 return rc;
383}
384
385/* unpin the scb provided by guest 2, marking it as dirty */
386static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
387 gpa_t gpa)
388{
389 hpa_t hpa = (hpa_t) vsie_page->scb_o;
390
391 if (hpa)
392 unpin_guest_page(vcpu->kvm, gpa, hpa);
393 vsie_page->scb_o = NULL;
394}
395
396/*
397 * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
398 *
399 * Returns: - 0 if the scb was pinned.
400 * - > 0 if control has to be given to guest 2
401 * - -ENOMEM if out of memory
402 */
403static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
404 gpa_t gpa)
405{
406 hpa_t hpa;
407 int rc;
408
409 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
410 if (rc == -EINVAL) {
411 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
412 if (!rc)
413 rc = 1;
414 }
415 if (!rc)
416 vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
417 return rc;
418}
419
420/*
421 * Inject a fault into guest 2.
422 *
423 * Returns: - > 0 if control has to be given to guest 2
424 * < 0 if an error occurred during injection.
425 */
426static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
427 bool write_flag)
428{
429 struct kvm_s390_pgm_info pgm = {
430 .code = code,
431 .trans_exc_code =
432 /* 0-51: virtual address */
433 (vaddr & 0xfffffffffffff000UL) |
434 /* 52-53: store / fetch */
435 (((unsigned int) !write_flag) + 1) << 10,
436 /* 62-63: asce id (alway primary == 0) */
437 .exc_access_id = 0, /* always primary */
438 .op_access_id = 0, /* not MVPG */
439 };
440 int rc;
441
442 if (code == PGM_PROTECTION)
443 pgm.trans_exc_code |= 0x4UL;
444
445 rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
446 return rc ? rc : 1;
447}
448
449/*
450 * Handle a fault during vsie execution on a gmap shadow.
451 *
452 * Returns: - 0 if the fault was resolved
453 * - > 0 if control has to be given to guest 2
454 * - < 0 if an error occurred
455 */
456static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
457{
458 int rc;
459
460 if (current->thread.gmap_int_code == PGM_PROTECTION)
461 /* we can directly forward all protection exceptions */
462 return inject_fault(vcpu, PGM_PROTECTION,
463 current->thread.gmap_addr, 1);
464
465 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
466 current->thread.gmap_addr);
467 if (rc > 0) {
468 rc = inject_fault(vcpu, rc,
469 current->thread.gmap_addr,
470 current->thread.gmap_write_flag);
471 }
472 return rc;
473}
474
475static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
476{
477 vsie_page->scb_s.icptcode = 0;
478}
479
480/*
481 * Run the vsie on a shadow scb and a shadow gmap, without any further
482 * sanity checks, handling SIE faults.
483 *
484 * Returns: - 0 everything went fine
485 * - > 0 if control has to be given to guest 2
486 * - < 0 if an error occurred
487 */
488static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
489{
490 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
491 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
492 int rc;
493
494 if (need_resched())
495 schedule();
496 if (test_cpu_flag(CIF_MCCK_PENDING))
497 s390_handle_mcck();
498
499 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
500 local_irq_disable();
501 kvm_guest_enter();
502 local_irq_enable();
503
504 rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
505
506 local_irq_disable();
507 kvm_guest_exit();
508 local_irq_enable();
509 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
510
511 if (rc > 0)
512 rc = 0; /* we could still have an icpt */
513 else if (rc == -EFAULT)
514 return handle_fault(vcpu, vsie_page);
515
516 switch (scb_s->icptcode) {
517 case ICPT_STOP:
518 /* stop not requested by g2 - must have been a kick */
519 if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
520 clear_vsie_icpt(vsie_page);
521 break;
522 case ICPT_VALIDITY:
523 if ((scb_s->ipa & 0xf000) != 0xf000)
524 scb_s->ipa += 0x1000;
525 break;
526 }
527 return rc;
528}
529
530static void release_gmap_shadow(struct vsie_page *vsie_page)
531{
532 if (vsie_page->gmap)
533 gmap_put(vsie_page->gmap);
534 WRITE_ONCE(vsie_page->gmap, NULL);
535}
536
537static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
538 struct vsie_page *vsie_page)
539{
540 unsigned long asce;
541 union ctlreg0 cr0;
542 struct gmap *gmap;
543 int edat;
544
545 asce = vcpu->arch.sie_block->gcr[1];
546 cr0.val = vcpu->arch.sie_block->gcr[0];
547 edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
548 edat += edat && test_kvm_facility(vcpu->kvm, 78);
549
550 gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
551 if (IS_ERR(gmap))
552 return PTR_ERR(gmap);
553 gmap->private = vcpu->kvm;
554 WRITE_ONCE(vsie_page->gmap, gmap);
555 return 0;
556}
557
558/*
559 * Run the vsie on a shadowed scb, managing the gmap shadow, handling
560 * prefix pages and faults.
561 *
562 * Returns: - 0 if no errors occurred
563 * - > 0 if control has to be given to guest 2
564 * - -ENOMEM if out of memory
565 */
566static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
567{
568 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
569 int rc = 0;
570
571 while (1) {
572 rc = acquire_gmap_shadow(vcpu, vsie_page);
573 if (!rc)
574 rc = map_prefix(vcpu, vsie_page);
575 if (!rc) {
576 gmap_enable(vsie_page->gmap);
577 update_intervention_requests(vsie_page);
578 rc = do_vsie_run(vcpu, vsie_page);
579 gmap_enable(vcpu->arch.gmap);
580 }
581 release_gmap_shadow(vsie_page);
582
583 if (rc == -EAGAIN)
584 rc = 0;
585 if (rc || scb_s->icptcode || signal_pending(current) ||
586 kvm_s390_vcpu_has_irq(vcpu, 0))
587 break;
588 };
589
590 if (rc == -EFAULT) {
591 /*
592 * Addressing exceptions are always presentes as intercepts.
593 * As addressing exceptions are suppressing and our guest 3 PSW
594 * points at the responsible instruction, we have to
595 * forward the PSW and set the ilc. If we can't read guest 3
596 * instruction, we can use an arbitrary ilc. Let's always use
597 * ilen = 4 for now, so we can avoid reading in guest 3 virtual
598 * memory. (we could also fake the shadow so the hardware
599 * handles it).
600 */
601 scb_s->icptcode = ICPT_PROGI;
602 scb_s->iprcc = PGM_ADDRESSING;
603 scb_s->pgmilc = 4;
604 scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
605 }
606 return rc;
607}
608
609/*
610 * Get or create a vsie page for a scb address.
611 *
612 * Returns: - address of a vsie page (cached or new one)
613 * - NULL if the same scb address is already used by another VCPU
614 * - ERR_PTR(-ENOMEM) if out of memory
615 */
616static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
617{
618 struct vsie_page *vsie_page;
619 struct page *page;
620 int nr_vcpus;
621
622 rcu_read_lock();
623 page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
624 rcu_read_unlock();
625 if (page) {
626 if (page_ref_inc_return(page) == 2)
627 return page_to_virt(page);
628 page_ref_dec(page);
629 }
630
631 /*
632 * We want at least #online_vcpus shadows, so every VCPU can execute
633 * the VSIE in parallel.
634 */
635 nr_vcpus = atomic_read(&kvm->online_vcpus);
636
637 mutex_lock(&kvm->arch.vsie.mutex);
638 if (kvm->arch.vsie.page_count < nr_vcpus) {
639 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
640 if (!page) {
641 mutex_unlock(&kvm->arch.vsie.mutex);
642 return ERR_PTR(-ENOMEM);
643 }
644 page_ref_inc(page);
645 kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
646 kvm->arch.vsie.page_count++;
647 } else {
648 /* reuse an existing entry that belongs to nobody */
649 while (true) {
650 page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
651 if (page_ref_inc_return(page) == 2)
652 break;
653 page_ref_dec(page);
654 kvm->arch.vsie.next++;
655 kvm->arch.vsie.next %= nr_vcpus;
656 }
657 radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
658 }
659 page->index = addr;
660 /* double use of the same address */
661 if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
662 page_ref_dec(page);
663 mutex_unlock(&kvm->arch.vsie.mutex);
664 return NULL;
665 }
666 mutex_unlock(&kvm->arch.vsie.mutex);
667
668 vsie_page = page_to_virt(page);
669 memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
670 vsie_page->scb_s.ihcpu = 0xffffU;
671 return vsie_page;
672}
673
674/* put a vsie page acquired via get_vsie_page */
675static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
676{
677 struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
678
679 page_ref_dec(page);
680}
681
682int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
683{
684 struct vsie_page *vsie_page;
685 unsigned long scb_addr;
686 int rc;
687
688 vcpu->stat.instruction_sie++;
689 if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
690 return -EOPNOTSUPP;
691 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
692 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
693
694 BUILD_BUG_ON(sizeof(struct vsie_page) != 4096);
695 scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
696
697 /* 512 byte alignment */
698 if (unlikely(scb_addr & 0x1ffUL))
699 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
700
701 if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
702 return 0;
703
704 vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
705 if (IS_ERR(vsie_page))
706 return PTR_ERR(vsie_page);
707 else if (!vsie_page)
708 /* double use of sie control block - simply do nothing */
709 return 0;
710
711 rc = pin_scb(vcpu, vsie_page, scb_addr);
712 if (rc)
713 goto out_put;
714 rc = shadow_scb(vcpu, vsie_page);
715 if (rc)
716 goto out_unpin_scb;
717 rc = pin_blocks(vcpu, vsie_page);
718 if (rc)
719 goto out_unshadow;
720 rc = vsie_run(vcpu, vsie_page);
721 unpin_blocks(vcpu, vsie_page);
722out_unshadow:
723 unshadow_scb(vcpu, vsie_page);
724out_unpin_scb:
725 unpin_scb(vcpu, vsie_page, scb_addr);
726out_put:
727 put_vsie_page(vcpu->kvm, vsie_page);
728
729 return rc < 0 ? rc : 0;
730}
731
732/* Init the vsie data structures. To be called when a vm is initialized. */
733void kvm_s390_vsie_init(struct kvm *kvm)
734{
735 mutex_init(&kvm->arch.vsie.mutex);
736 INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
737}
738
739/* Destroy the vsie data structures. To be called when a vm is destroyed. */
740void kvm_s390_vsie_destroy(struct kvm *kvm)
741{
742 struct page *page;
743 int i;
744
745 mutex_lock(&kvm->arch.vsie.mutex);
746 for (i = 0; i < kvm->arch.vsie.page_count; i++) {
747 page = kvm->arch.vsie.pages[i];
748 kvm->arch.vsie.pages[i] = NULL;
749 /* free the radix tree entry */
750 radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
751 __free_page(page);
752 }
753 kvm->arch.vsie.page_count = 0;
754 mutex_unlock(&kvm->arch.vsie.mutex);
755}