diff options
-rw-r--r-- | drivers/kvm/Kconfig | 1 | ||||
-rw-r--r-- | drivers/kvm/Makefile | 2 | ||||
-rw-r--r-- | drivers/kvm/i8259.c | 450 | ||||
-rw-r--r-- | drivers/kvm/ioapic.c | 388 | ||||
-rw-r--r-- | drivers/kvm/irq.c | 98 | ||||
-rw-r--r-- | drivers/kvm/irq.h | 165 | ||||
-rw-r--r-- | drivers/kvm/kvm.h | 201 | ||||
-rw-r--r-- | drivers/kvm/kvm_main.c | 1486 | ||||
-rw-r--r-- | drivers/kvm/kvm_svm.h | 3 | ||||
-rw-r--r-- | drivers/kvm/lapic.c | 1064 | ||||
-rw-r--r-- | drivers/kvm/mmu.c | 51 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 84 | ||||
-rw-r--r-- | drivers/kvm/svm.c | 1046 | ||||
-rw-r--r-- | drivers/kvm/vmx.c | 1034 | ||||
-rw-r--r-- | drivers/kvm/vmx.h | 73 | ||||
-rw-r--r-- | drivers/kvm/x86_emulate.c | 411 | ||||
-rw-r--r-- | drivers/kvm/x86_emulate.h | 20 | ||||
-rw-r--r-- | include/asm-x86/io_apic_32.h | 16 | ||||
-rw-r--r-- | include/asm-x86/processor-flags.h | 2 | ||||
-rw-r--r-- | include/linux/kvm.h | 128 |
20 files changed, 4848 insertions, 1875 deletions
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index 0a419a0de603..8749fa4ffcee 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig | |||
@@ -17,6 +17,7 @@ if VIRTUALIZATION | |||
17 | config KVM | 17 | config KVM |
18 | tristate "Kernel-based Virtual Machine (KVM) support" | 18 | tristate "Kernel-based Virtual Machine (KVM) support" |
19 | depends on X86 && EXPERIMENTAL | 19 | depends on X86 && EXPERIMENTAL |
20 | select PREEMPT_NOTIFIERS | ||
20 | select ANON_INODES | 21 | select ANON_INODES |
21 | ---help--- | 22 | ---help--- |
22 | Support hosting fully virtualized guest machines using hardware | 23 | Support hosting fully virtualized guest machines using hardware |
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile index c0a789fa9d65..e5a8f4d3e973 100644 --- a/drivers/kvm/Makefile +++ b/drivers/kvm/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for Kernel-based Virtual Machine module | 2 | # Makefile for Kernel-based Virtual Machine module |
3 | # | 3 | # |
4 | 4 | ||
5 | kvm-objs := kvm_main.o mmu.o x86_emulate.o | 5 | kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o |
6 | obj-$(CONFIG_KVM) += kvm.o | 6 | obj-$(CONFIG_KVM) += kvm.o |
7 | kvm-intel-objs = vmx.o | 7 | kvm-intel-objs = vmx.o |
8 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o | 8 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o |
diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c new file mode 100644 index 000000000000..a679157bc599 --- /dev/null +++ b/drivers/kvm/i8259.c | |||
@@ -0,0 +1,450 @@ | |||
1 | /* | ||
2 | * 8259 interrupt controller emulation | ||
3 | * | ||
4 | * Copyright (c) 2003-2004 Fabrice Bellard | ||
5 | * Copyright (c) 2007 Intel Corporation | ||
6 | * | ||
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
8 | * of this software and associated documentation files (the "Software"), to deal | ||
9 | * in the Software without restriction, including without limitation the rights | ||
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
11 | * copies of the Software, and to permit persons to whom the Software is | ||
12 | * furnished to do so, subject to the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice shall be included in | ||
15 | * all copies or substantial portions of the Software. | ||
16 | * | ||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
23 | * THE SOFTWARE. | ||
24 | * Authors: | ||
25 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> | ||
26 | * Port from Qemu. | ||
27 | */ | ||
28 | #include <linux/mm.h> | ||
29 | #include "irq.h" | ||
30 | |||
31 | /* | ||
32 | * set irq level. If an edge is detected, then the IRR is set to 1 | ||
33 | */ | ||
34 | static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) | ||
35 | { | ||
36 | int mask; | ||
37 | mask = 1 << irq; | ||
38 | if (s->elcr & mask) /* level triggered */ | ||
39 | if (level) { | ||
40 | s->irr |= mask; | ||
41 | s->last_irr |= mask; | ||
42 | } else { | ||
43 | s->irr &= ~mask; | ||
44 | s->last_irr &= ~mask; | ||
45 | } | ||
46 | else /* edge triggered */ | ||
47 | if (level) { | ||
48 | if ((s->last_irr & mask) == 0) | ||
49 | s->irr |= mask; | ||
50 | s->last_irr |= mask; | ||
51 | } else | ||
52 | s->last_irr &= ~mask; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * return the highest priority found in mask (highest = smallest | ||
57 | * number). Return 8 if no irq | ||
58 | */ | ||
59 | static inline int get_priority(struct kvm_kpic_state *s, int mask) | ||
60 | { | ||
61 | int priority; | ||
62 | if (mask == 0) | ||
63 | return 8; | ||
64 | priority = 0; | ||
65 | while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0) | ||
66 | priority++; | ||
67 | return priority; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * return the pic wanted interrupt. return -1 if none | ||
72 | */ | ||
73 | static int pic_get_irq(struct kvm_kpic_state *s) | ||
74 | { | ||
75 | int mask, cur_priority, priority; | ||
76 | |||
77 | mask = s->irr & ~s->imr; | ||
78 | priority = get_priority(s, mask); | ||
79 | if (priority == 8) | ||
80 | return -1; | ||
81 | /* | ||
82 | * compute current priority. If special fully nested mode on the | ||
83 | * master, the IRQ coming from the slave is not taken into account | ||
84 | * for the priority computation. | ||
85 | */ | ||
86 | mask = s->isr; | ||
87 | if (s->special_fully_nested_mode && s == &s->pics_state->pics[0]) | ||
88 | mask &= ~(1 << 2); | ||
89 | cur_priority = get_priority(s, mask); | ||
90 | if (priority < cur_priority) | ||
91 | /* | ||
92 | * higher priority found: an irq should be generated | ||
93 | */ | ||
94 | return (priority + s->priority_add) & 7; | ||
95 | else | ||
96 | return -1; | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * raise irq to CPU if necessary. must be called every time the active | ||
101 | * irq may change | ||
102 | */ | ||
103 | static void pic_update_irq(struct kvm_pic *s) | ||
104 | { | ||
105 | int irq2, irq; | ||
106 | |||
107 | irq2 = pic_get_irq(&s->pics[1]); | ||
108 | if (irq2 >= 0) { | ||
109 | /* | ||
110 | * if irq request by slave pic, signal master PIC | ||
111 | */ | ||
112 | pic_set_irq1(&s->pics[0], 2, 1); | ||
113 | pic_set_irq1(&s->pics[0], 2, 0); | ||
114 | } | ||
115 | irq = pic_get_irq(&s->pics[0]); | ||
116 | if (irq >= 0) | ||
117 | s->irq_request(s->irq_request_opaque, 1); | ||
118 | else | ||
119 | s->irq_request(s->irq_request_opaque, 0); | ||
120 | } | ||
121 | |||
122 | void kvm_pic_update_irq(struct kvm_pic *s) | ||
123 | { | ||
124 | pic_update_irq(s); | ||
125 | } | ||
126 | |||
127 | void kvm_pic_set_irq(void *opaque, int irq, int level) | ||
128 | { | ||
129 | struct kvm_pic *s = opaque; | ||
130 | |||
131 | pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | ||
132 | pic_update_irq(s); | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * acknowledge interrupt 'irq' | ||
137 | */ | ||
138 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) | ||
139 | { | ||
140 | if (s->auto_eoi) { | ||
141 | if (s->rotate_on_auto_eoi) | ||
142 | s->priority_add = (irq + 1) & 7; | ||
143 | } else | ||
144 | s->isr |= (1 << irq); | ||
145 | /* | ||
146 | * We don't clear a level sensitive interrupt here | ||
147 | */ | ||
148 | if (!(s->elcr & (1 << irq))) | ||
149 | s->irr &= ~(1 << irq); | ||
150 | } | ||
151 | |||
152 | int kvm_pic_read_irq(struct kvm_pic *s) | ||
153 | { | ||
154 | int irq, irq2, intno; | ||
155 | |||
156 | irq = pic_get_irq(&s->pics[0]); | ||
157 | if (irq >= 0) { | ||
158 | pic_intack(&s->pics[0], irq); | ||
159 | if (irq == 2) { | ||
160 | irq2 = pic_get_irq(&s->pics[1]); | ||
161 | if (irq2 >= 0) | ||
162 | pic_intack(&s->pics[1], irq2); | ||
163 | else | ||
164 | /* | ||
165 | * spurious IRQ on slave controller | ||
166 | */ | ||
167 | irq2 = 7; | ||
168 | intno = s->pics[1].irq_base + irq2; | ||
169 | irq = irq2 + 8; | ||
170 | } else | ||
171 | intno = s->pics[0].irq_base + irq; | ||
172 | } else { | ||
173 | /* | ||
174 | * spurious IRQ on host controller | ||
175 | */ | ||
176 | irq = 7; | ||
177 | intno = s->pics[0].irq_base + irq; | ||
178 | } | ||
179 | pic_update_irq(s); | ||
180 | |||
181 | return intno; | ||
182 | } | ||
183 | |||
184 | static void pic_reset(void *opaque) | ||
185 | { | ||
186 | struct kvm_kpic_state *s = opaque; | ||
187 | |||
188 | s->last_irr = 0; | ||
189 | s->irr = 0; | ||
190 | s->imr = 0; | ||
191 | s->isr = 0; | ||
192 | s->priority_add = 0; | ||
193 | s->irq_base = 0; | ||
194 | s->read_reg_select = 0; | ||
195 | s->poll = 0; | ||
196 | s->special_mask = 0; | ||
197 | s->init_state = 0; | ||
198 | s->auto_eoi = 0; | ||
199 | s->rotate_on_auto_eoi = 0; | ||
200 | s->special_fully_nested_mode = 0; | ||
201 | s->init4 = 0; | ||
202 | } | ||
203 | |||
204 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) | ||
205 | { | ||
206 | struct kvm_kpic_state *s = opaque; | ||
207 | int priority, cmd, irq; | ||
208 | |||
209 | addr &= 1; | ||
210 | if (addr == 0) { | ||
211 | if (val & 0x10) { | ||
212 | pic_reset(s); /* init */ | ||
213 | /* | ||
214 | * deassert a pending interrupt | ||
215 | */ | ||
216 | s->pics_state->irq_request(s->pics_state-> | ||
217 | irq_request_opaque, 0); | ||
218 | s->init_state = 1; | ||
219 | s->init4 = val & 1; | ||
220 | if (val & 0x02) | ||
221 | printk(KERN_ERR "single mode not supported"); | ||
222 | if (val & 0x08) | ||
223 | printk(KERN_ERR | ||
224 | "level sensitive irq not supported"); | ||
225 | } else if (val & 0x08) { | ||
226 | if (val & 0x04) | ||
227 | s->poll = 1; | ||
228 | if (val & 0x02) | ||
229 | s->read_reg_select = val & 1; | ||
230 | if (val & 0x40) | ||
231 | s->special_mask = (val >> 5) & 1; | ||
232 | } else { | ||
233 | cmd = val >> 5; | ||
234 | switch (cmd) { | ||
235 | case 0: | ||
236 | case 4: | ||
237 | s->rotate_on_auto_eoi = cmd >> 2; | ||
238 | break; | ||
239 | case 1: /* end of interrupt */ | ||
240 | case 5: | ||
241 | priority = get_priority(s, s->isr); | ||
242 | if (priority != 8) { | ||
243 | irq = (priority + s->priority_add) & 7; | ||
244 | s->isr &= ~(1 << irq); | ||
245 | if (cmd == 5) | ||
246 | s->priority_add = (irq + 1) & 7; | ||
247 | pic_update_irq(s->pics_state); | ||
248 | } | ||
249 | break; | ||
250 | case 3: | ||
251 | irq = val & 7; | ||
252 | s->isr &= ~(1 << irq); | ||
253 | pic_update_irq(s->pics_state); | ||
254 | break; | ||
255 | case 6: | ||
256 | s->priority_add = (val + 1) & 7; | ||
257 | pic_update_irq(s->pics_state); | ||
258 | break; | ||
259 | case 7: | ||
260 | irq = val & 7; | ||
261 | s->isr &= ~(1 << irq); | ||
262 | s->priority_add = (irq + 1) & 7; | ||
263 | pic_update_irq(s->pics_state); | ||
264 | break; | ||
265 | default: | ||
266 | break; /* no operation */ | ||
267 | } | ||
268 | } | ||
269 | } else | ||
270 | switch (s->init_state) { | ||
271 | case 0: /* normal mode */ | ||
272 | s->imr = val; | ||
273 | pic_update_irq(s->pics_state); | ||
274 | break; | ||
275 | case 1: | ||
276 | s->irq_base = val & 0xf8; | ||
277 | s->init_state = 2; | ||
278 | break; | ||
279 | case 2: | ||
280 | if (s->init4) | ||
281 | s->init_state = 3; | ||
282 | else | ||
283 | s->init_state = 0; | ||
284 | break; | ||
285 | case 3: | ||
286 | s->special_fully_nested_mode = (val >> 4) & 1; | ||
287 | s->auto_eoi = (val >> 1) & 1; | ||
288 | s->init_state = 0; | ||
289 | break; | ||
290 | } | ||
291 | } | ||
292 | |||
293 | static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) | ||
294 | { | ||
295 | int ret; | ||
296 | |||
297 | ret = pic_get_irq(s); | ||
298 | if (ret >= 0) { | ||
299 | if (addr1 >> 7) { | ||
300 | s->pics_state->pics[0].isr &= ~(1 << 2); | ||
301 | s->pics_state->pics[0].irr &= ~(1 << 2); | ||
302 | } | ||
303 | s->irr &= ~(1 << ret); | ||
304 | s->isr &= ~(1 << ret); | ||
305 | if (addr1 >> 7 || ret != 2) | ||
306 | pic_update_irq(s->pics_state); | ||
307 | } else { | ||
308 | ret = 0x07; | ||
309 | pic_update_irq(s->pics_state); | ||
310 | } | ||
311 | |||
312 | return ret; | ||
313 | } | ||
314 | |||
315 | static u32 pic_ioport_read(void *opaque, u32 addr1) | ||
316 | { | ||
317 | struct kvm_kpic_state *s = opaque; | ||
318 | unsigned int addr; | ||
319 | int ret; | ||
320 | |||
321 | addr = addr1; | ||
322 | addr &= 1; | ||
323 | if (s->poll) { | ||
324 | ret = pic_poll_read(s, addr1); | ||
325 | s->poll = 0; | ||
326 | } else | ||
327 | if (addr == 0) | ||
328 | if (s->read_reg_select) | ||
329 | ret = s->isr; | ||
330 | else | ||
331 | ret = s->irr; | ||
332 | else | ||
333 | ret = s->imr; | ||
334 | return ret; | ||
335 | } | ||
336 | |||
337 | static void elcr_ioport_write(void *opaque, u32 addr, u32 val) | ||
338 | { | ||
339 | struct kvm_kpic_state *s = opaque; | ||
340 | s->elcr = val & s->elcr_mask; | ||
341 | } | ||
342 | |||
343 | static u32 elcr_ioport_read(void *opaque, u32 addr1) | ||
344 | { | ||
345 | struct kvm_kpic_state *s = opaque; | ||
346 | return s->elcr; | ||
347 | } | ||
348 | |||
349 | static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) | ||
350 | { | ||
351 | switch (addr) { | ||
352 | case 0x20: | ||
353 | case 0x21: | ||
354 | case 0xa0: | ||
355 | case 0xa1: | ||
356 | case 0x4d0: | ||
357 | case 0x4d1: | ||
358 | return 1; | ||
359 | default: | ||
360 | return 0; | ||
361 | } | ||
362 | } | ||
363 | |||
364 | static void picdev_write(struct kvm_io_device *this, | ||
365 | gpa_t addr, int len, const void *val) | ||
366 | { | ||
367 | struct kvm_pic *s = this->private; | ||
368 | unsigned char data = *(unsigned char *)val; | ||
369 | |||
370 | if (len != 1) { | ||
371 | if (printk_ratelimit()) | ||
372 | printk(KERN_ERR "PIC: non byte write\n"); | ||
373 | return; | ||
374 | } | ||
375 | switch (addr) { | ||
376 | case 0x20: | ||
377 | case 0x21: | ||
378 | case 0xa0: | ||
379 | case 0xa1: | ||
380 | pic_ioport_write(&s->pics[addr >> 7], addr, data); | ||
381 | break; | ||
382 | case 0x4d0: | ||
383 | case 0x4d1: | ||
384 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | ||
385 | break; | ||
386 | } | ||
387 | } | ||
388 | |||
389 | static void picdev_read(struct kvm_io_device *this, | ||
390 | gpa_t addr, int len, void *val) | ||
391 | { | ||
392 | struct kvm_pic *s = this->private; | ||
393 | unsigned char data = 0; | ||
394 | |||
395 | if (len != 1) { | ||
396 | if (printk_ratelimit()) | ||
397 | printk(KERN_ERR "PIC: non byte read\n"); | ||
398 | return; | ||
399 | } | ||
400 | switch (addr) { | ||
401 | case 0x20: | ||
402 | case 0x21: | ||
403 | case 0xa0: | ||
404 | case 0xa1: | ||
405 | data = pic_ioport_read(&s->pics[addr >> 7], addr); | ||
406 | break; | ||
407 | case 0x4d0: | ||
408 | case 0x4d1: | ||
409 | data = elcr_ioport_read(&s->pics[addr & 1], addr); | ||
410 | break; | ||
411 | } | ||
412 | *(unsigned char *)val = data; | ||
413 | } | ||
414 | |||
415 | /* | ||
416 | * callback when PIC0 irq status changed | ||
417 | */ | ||
418 | static void pic_irq_request(void *opaque, int level) | ||
419 | { | ||
420 | struct kvm *kvm = opaque; | ||
421 | struct kvm_vcpu *vcpu = kvm->vcpus[0]; | ||
422 | |||
423 | pic_irqchip(kvm)->output = level; | ||
424 | if (vcpu) | ||
425 | kvm_vcpu_kick(vcpu); | ||
426 | } | ||
427 | |||
428 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) | ||
429 | { | ||
430 | struct kvm_pic *s; | ||
431 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | ||
432 | if (!s) | ||
433 | return NULL; | ||
434 | s->pics[0].elcr_mask = 0xf8; | ||
435 | s->pics[1].elcr_mask = 0xde; | ||
436 | s->irq_request = pic_irq_request; | ||
437 | s->irq_request_opaque = kvm; | ||
438 | s->pics[0].pics_state = s; | ||
439 | s->pics[1].pics_state = s; | ||
440 | |||
441 | /* | ||
442 | * Initialize PIO device | ||
443 | */ | ||
444 | s->dev.read = picdev_read; | ||
445 | s->dev.write = picdev_write; | ||
446 | s->dev.in_range = picdev_in_range; | ||
447 | s->dev.private = s; | ||
448 | kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev); | ||
449 | return s; | ||
450 | } | ||
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c new file mode 100644 index 000000000000..c7992e667fdb --- /dev/null +++ b/drivers/kvm/ioapic.c | |||
@@ -0,0 +1,388 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001 MandrakeSoft S.A. | ||
3 | * | ||
4 | * MandrakeSoft S.A. | ||
5 | * 43, rue d'Aboukir | ||
6 | * 75002 Paris - France | ||
7 | * http://www.linux-mandrake.com/ | ||
8 | * http://www.mandrakesoft.com/ | ||
9 | * | ||
10 | * This library is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU Lesser General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This library is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * Lesser General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU Lesser General Public | ||
21 | * License along with this library; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | * | ||
24 | * Yunhong Jiang <yunhong.jiang@intel.com> | ||
25 | * Yaozu (Eddie) Dong <eddie.dong@intel.com> | ||
26 | * Based on Xen 3.1 code. | ||
27 | */ | ||
28 | |||
29 | #include "kvm.h" | ||
30 | #include <linux/kvm.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/highmem.h> | ||
33 | #include <linux/smp.h> | ||
34 | #include <linux/hrtimer.h> | ||
35 | #include <linux/io.h> | ||
36 | #include <asm/processor.h> | ||
37 | #include <asm/msr.h> | ||
38 | #include <asm/page.h> | ||
39 | #include <asm/current.h> | ||
40 | #include <asm/apicdef.h> | ||
41 | #include <asm/io_apic.h> | ||
42 | #include "irq.h" | ||
43 | /* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ | ||
44 | #define ioapic_debug(fmt, arg...) | ||
45 | static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq); | ||
46 | |||
47 | static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, | ||
48 | unsigned long addr, | ||
49 | unsigned long length) | ||
50 | { | ||
51 | unsigned long result = 0; | ||
52 | |||
53 | switch (ioapic->ioregsel) { | ||
54 | case IOAPIC_REG_VERSION: | ||
55 | result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) | ||
56 | | (IOAPIC_VERSION_ID & 0xff)); | ||
57 | break; | ||
58 | |||
59 | case IOAPIC_REG_APIC_ID: | ||
60 | case IOAPIC_REG_ARB_ID: | ||
61 | result = ((ioapic->id & 0xf) << 24); | ||
62 | break; | ||
63 | |||
64 | default: | ||
65 | { | ||
66 | u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; | ||
67 | u64 redir_content; | ||
68 | |||
69 | ASSERT(redir_index < IOAPIC_NUM_PINS); | ||
70 | |||
71 | redir_content = ioapic->redirtbl[redir_index].bits; | ||
72 | result = (ioapic->ioregsel & 0x1) ? | ||
73 | (redir_content >> 32) & 0xffffffff : | ||
74 | redir_content & 0xffffffff; | ||
75 | break; | ||
76 | } | ||
77 | } | ||
78 | |||
79 | return result; | ||
80 | } | ||
81 | |||
82 | static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) | ||
83 | { | ||
84 | union ioapic_redir_entry *pent; | ||
85 | |||
86 | pent = &ioapic->redirtbl[idx]; | ||
87 | |||
88 | if (!pent->fields.mask) { | ||
89 | ioapic_deliver(ioapic, idx); | ||
90 | if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) | ||
91 | pent->fields.remote_irr = 1; | ||
92 | } | ||
93 | if (!pent->fields.trig_mode) | ||
94 | ioapic->irr &= ~(1 << idx); | ||
95 | } | ||
96 | |||
97 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | ||
98 | { | ||
99 | unsigned index; | ||
100 | |||
101 | switch (ioapic->ioregsel) { | ||
102 | case IOAPIC_REG_VERSION: | ||
103 | /* Writes are ignored. */ | ||
104 | break; | ||
105 | |||
106 | case IOAPIC_REG_APIC_ID: | ||
107 | ioapic->id = (val >> 24) & 0xf; | ||
108 | break; | ||
109 | |||
110 | case IOAPIC_REG_ARB_ID: | ||
111 | break; | ||
112 | |||
113 | default: | ||
114 | index = (ioapic->ioregsel - 0x10) >> 1; | ||
115 | |||
116 | ioapic_debug("change redir index %x val %x", index, val); | ||
117 | if (index >= IOAPIC_NUM_PINS) | ||
118 | return; | ||
119 | if (ioapic->ioregsel & 1) { | ||
120 | ioapic->redirtbl[index].bits &= 0xffffffff; | ||
121 | ioapic->redirtbl[index].bits |= (u64) val << 32; | ||
122 | } else { | ||
123 | ioapic->redirtbl[index].bits &= ~0xffffffffULL; | ||
124 | ioapic->redirtbl[index].bits |= (u32) val; | ||
125 | ioapic->redirtbl[index].fields.remote_irr = 0; | ||
126 | } | ||
127 | if (ioapic->irr & (1 << index)) | ||
128 | ioapic_service(ioapic, index); | ||
129 | break; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | static void ioapic_inj_irq(struct kvm_ioapic *ioapic, | ||
134 | struct kvm_lapic *target, | ||
135 | u8 vector, u8 trig_mode, u8 delivery_mode) | ||
136 | { | ||
137 | ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode, | ||
138 | delivery_mode); | ||
139 | |||
140 | ASSERT((delivery_mode == dest_Fixed) || | ||
141 | (delivery_mode == dest_LowestPrio)); | ||
142 | |||
143 | kvm_apic_set_irq(target, vector, trig_mode); | ||
144 | } | ||
145 | |||
146 | static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, | ||
147 | u8 dest_mode) | ||
148 | { | ||
149 | u32 mask = 0; | ||
150 | int i; | ||
151 | struct kvm *kvm = ioapic->kvm; | ||
152 | struct kvm_vcpu *vcpu; | ||
153 | |||
154 | ioapic_debug("dest %d dest_mode %d", dest, dest_mode); | ||
155 | |||
156 | if (dest_mode == 0) { /* Physical mode. */ | ||
157 | if (dest == 0xFF) { /* Broadcast. */ | ||
158 | for (i = 0; i < KVM_MAX_VCPUS; ++i) | ||
159 | if (kvm->vcpus[i] && kvm->vcpus[i]->apic) | ||
160 | mask |= 1 << i; | ||
161 | return mask; | ||
162 | } | ||
163 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
164 | vcpu = kvm->vcpus[i]; | ||
165 | if (!vcpu) | ||
166 | continue; | ||
167 | if (kvm_apic_match_physical_addr(vcpu->apic, dest)) { | ||
168 | if (vcpu->apic) | ||
169 | mask = 1 << i; | ||
170 | break; | ||
171 | } | ||
172 | } | ||
173 | } else if (dest != 0) /* Logical mode, MDA non-zero. */ | ||
174 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
175 | vcpu = kvm->vcpus[i]; | ||
176 | if (!vcpu) | ||
177 | continue; | ||
178 | if (vcpu->apic && | ||
179 | kvm_apic_match_logical_addr(vcpu->apic, dest)) | ||
180 | mask |= 1 << vcpu->vcpu_id; | ||
181 | } | ||
182 | ioapic_debug("mask %x", mask); | ||
183 | return mask; | ||
184 | } | ||
185 | |||
186 | static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | ||
187 | { | ||
188 | u8 dest = ioapic->redirtbl[irq].fields.dest_id; | ||
189 | u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; | ||
190 | u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode; | ||
191 | u8 vector = ioapic->redirtbl[irq].fields.vector; | ||
192 | u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; | ||
193 | u32 deliver_bitmask; | ||
194 | struct kvm_lapic *target; | ||
195 | struct kvm_vcpu *vcpu; | ||
196 | int vcpu_id; | ||
197 | |||
198 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " | ||
199 | "vector=%x trig_mode=%x", | ||
200 | dest, dest_mode, delivery_mode, vector, trig_mode); | ||
201 | |||
202 | deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); | ||
203 | if (!deliver_bitmask) { | ||
204 | ioapic_debug("no target on destination"); | ||
205 | return; | ||
206 | } | ||
207 | |||
208 | switch (delivery_mode) { | ||
209 | case dest_LowestPrio: | ||
210 | target = | ||
211 | kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask); | ||
212 | if (target != NULL) | ||
213 | ioapic_inj_irq(ioapic, target, vector, | ||
214 | trig_mode, delivery_mode); | ||
215 | else | ||
216 | ioapic_debug("null round robin: " | ||
217 | "mask=%x vector=%x delivery_mode=%x", | ||
218 | deliver_bitmask, vector, dest_LowestPrio); | ||
219 | break; | ||
220 | case dest_Fixed: | ||
221 | for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { | ||
222 | if (!(deliver_bitmask & (1 << vcpu_id))) | ||
223 | continue; | ||
224 | deliver_bitmask &= ~(1 << vcpu_id); | ||
225 | vcpu = ioapic->kvm->vcpus[vcpu_id]; | ||
226 | if (vcpu) { | ||
227 | target = vcpu->apic; | ||
228 | ioapic_inj_irq(ioapic, target, vector, | ||
229 | trig_mode, delivery_mode); | ||
230 | } | ||
231 | } | ||
232 | break; | ||
233 | |||
234 | /* TODO: NMI */ | ||
235 | default: | ||
236 | printk(KERN_WARNING "Unsupported delivery mode %d\n", | ||
237 | delivery_mode); | ||
238 | break; | ||
239 | } | ||
240 | } | ||
241 | |||
242 | void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | ||
243 | { | ||
244 | u32 old_irr = ioapic->irr; | ||
245 | u32 mask = 1 << irq; | ||
246 | union ioapic_redir_entry entry; | ||
247 | |||
248 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { | ||
249 | entry = ioapic->redirtbl[irq]; | ||
250 | level ^= entry.fields.polarity; | ||
251 | if (!level) | ||
252 | ioapic->irr &= ~mask; | ||
253 | else { | ||
254 | ioapic->irr |= mask; | ||
255 | if ((!entry.fields.trig_mode && old_irr != ioapic->irr) | ||
256 | || !entry.fields.remote_irr) | ||
257 | ioapic_service(ioapic, irq); | ||
258 | } | ||
259 | } | ||
260 | } | ||
261 | |||
262 | static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector) | ||
263 | { | ||
264 | int i; | ||
265 | |||
266 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | ||
267 | if (ioapic->redirtbl[i].fields.vector == vector) | ||
268 | return i; | ||
269 | return -1; | ||
270 | } | ||
271 | |||
272 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) | ||
273 | { | ||
274 | struct kvm_ioapic *ioapic = kvm->vioapic; | ||
275 | union ioapic_redir_entry *ent; | ||
276 | int gsi; | ||
277 | |||
278 | gsi = get_eoi_gsi(ioapic, vector); | ||
279 | if (gsi == -1) { | ||
280 | printk(KERN_WARNING "Can't find redir item for %d EOI\n", | ||
281 | vector); | ||
282 | return; | ||
283 | } | ||
284 | |||
285 | ent = &ioapic->redirtbl[gsi]; | ||
286 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | ||
287 | |||
288 | ent->fields.remote_irr = 0; | ||
289 | if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) | ||
290 | ioapic_deliver(ioapic, gsi); | ||
291 | } | ||
292 | |||
293 | static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) | ||
294 | { | ||
295 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | ||
296 | |||
297 | return ((addr >= ioapic->base_address && | ||
298 | (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); | ||
299 | } | ||
300 | |||
301 | static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | ||
302 | void *val) | ||
303 | { | ||
304 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | ||
305 | u32 result; | ||
306 | |||
307 | ioapic_debug("addr %lx", (unsigned long)addr); | ||
308 | ASSERT(!(addr & 0xf)); /* check alignment */ | ||
309 | |||
310 | addr &= 0xff; | ||
311 | switch (addr) { | ||
312 | case IOAPIC_REG_SELECT: | ||
313 | result = ioapic->ioregsel; | ||
314 | break; | ||
315 | |||
316 | case IOAPIC_REG_WINDOW: | ||
317 | result = ioapic_read_indirect(ioapic, addr, len); | ||
318 | break; | ||
319 | |||
320 | default: | ||
321 | result = 0; | ||
322 | break; | ||
323 | } | ||
324 | switch (len) { | ||
325 | case 8: | ||
326 | *(u64 *) val = result; | ||
327 | break; | ||
328 | case 1: | ||
329 | case 2: | ||
330 | case 4: | ||
331 | memcpy(val, (char *)&result, len); | ||
332 | break; | ||
333 | default: | ||
334 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); | ||
335 | } | ||
336 | } | ||
337 | |||
338 | static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | ||
339 | const void *val) | ||
340 | { | ||
341 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | ||
342 | u32 data; | ||
343 | |||
344 | ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n", | ||
345 | addr, len, val); | ||
346 | ASSERT(!(addr & 0xf)); /* check alignment */ | ||
347 | if (len == 4 || len == 8) | ||
348 | data = *(u32 *) val; | ||
349 | else { | ||
350 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); | ||
351 | return; | ||
352 | } | ||
353 | |||
354 | addr &= 0xff; | ||
355 | switch (addr) { | ||
356 | case IOAPIC_REG_SELECT: | ||
357 | ioapic->ioregsel = data; | ||
358 | break; | ||
359 | |||
360 | case IOAPIC_REG_WINDOW: | ||
361 | ioapic_write_indirect(ioapic, data); | ||
362 | break; | ||
363 | |||
364 | default: | ||
365 | break; | ||
366 | } | ||
367 | } | ||
368 | |||
369 | int kvm_ioapic_init(struct kvm *kvm) | ||
370 | { | ||
371 | struct kvm_ioapic *ioapic; | ||
372 | int i; | ||
373 | |||
374 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); | ||
375 | if (!ioapic) | ||
376 | return -ENOMEM; | ||
377 | kvm->vioapic = ioapic; | ||
378 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | ||
379 | ioapic->redirtbl[i].fields.mask = 1; | ||
380 | ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; | ||
381 | ioapic->dev.read = ioapic_mmio_read; | ||
382 | ioapic->dev.write = ioapic_mmio_write; | ||
383 | ioapic->dev.in_range = ioapic_in_range; | ||
384 | ioapic->dev.private = ioapic; | ||
385 | ioapic->kvm = kvm; | ||
386 | kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); | ||
387 | return 0; | ||
388 | } | ||
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c new file mode 100644 index 000000000000..7628c7ff628f --- /dev/null +++ b/drivers/kvm/irq.c | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * irq.c: API for in kernel interrupt controller | ||
3 | * Copyright (c) 2007, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
16 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
17 | * Authors: | ||
18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #include <linux/module.h> | ||
23 | |||
24 | #include "kvm.h" | ||
25 | #include "irq.h" | ||
26 | |||
27 | /* | ||
28 | * check if there is pending interrupt without | ||
29 | * intack. | ||
30 | */ | ||
31 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | ||
32 | { | ||
33 | struct kvm_pic *s; | ||
34 | |||
35 | if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ | ||
36 | if (kvm_apic_accept_pic_intr(v)) { | ||
37 | s = pic_irqchip(v->kvm); /* PIC */ | ||
38 | return s->output; | ||
39 | } else | ||
40 | return 0; | ||
41 | } | ||
42 | return 1; | ||
43 | } | ||
44 | EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); | ||
45 | |||
46 | /* | ||
47 | * Read pending interrupt vector and intack. | ||
48 | */ | ||
49 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | ||
50 | { | ||
51 | struct kvm_pic *s; | ||
52 | int vector; | ||
53 | |||
54 | vector = kvm_get_apic_interrupt(v); /* APIC */ | ||
55 | if (vector == -1) { | ||
56 | if (kvm_apic_accept_pic_intr(v)) { | ||
57 | s = pic_irqchip(v->kvm); | ||
58 | s->output = 0; /* PIC */ | ||
59 | vector = kvm_pic_read_irq(s); | ||
60 | } | ||
61 | } | ||
62 | return vector; | ||
63 | } | ||
64 | EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | ||
65 | |||
66 | static void vcpu_kick_intr(void *info) | ||
67 | { | ||
68 | #ifdef DEBUG | ||
69 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; | ||
70 | printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); | ||
71 | #endif | ||
72 | } | ||
73 | |||
74 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | ||
75 | { | ||
76 | int ipi_pcpu = vcpu->cpu; | ||
77 | |||
78 | if (waitqueue_active(&vcpu->wq)) { | ||
79 | wake_up_interruptible(&vcpu->wq); | ||
80 | ++vcpu->stat.halt_wakeup; | ||
81 | } | ||
82 | if (vcpu->guest_mode) | ||
83 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); | ||
84 | } | ||
85 | |||
86 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | ||
87 | { | ||
88 | kvm_inject_apic_timer_irqs(vcpu); | ||
89 | /* TODO: PIT, RTC etc. */ | ||
90 | } | ||
91 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | ||
92 | |||
93 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | ||
94 | { | ||
95 | kvm_apic_timer_intr_post(vcpu, vec); | ||
96 | /* TODO: PIT, RTC etc. */ | ||
97 | } | ||
98 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); | ||
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h new file mode 100644 index 000000000000..11fc014e2b30 --- /dev/null +++ b/drivers/kvm/irq.h | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * irq.h: in kernel interrupt controller related definitions | ||
3 | * Copyright (c) 2007, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
16 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
17 | * Authors: | ||
18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #ifndef __IRQ_H | ||
23 | #define __IRQ_H | ||
24 | |||
25 | #include "kvm.h" | ||
26 | |||
27 | typedef void irq_request_func(void *opaque, int level); | ||
28 | |||
29 | struct kvm_kpic_state { | ||
30 | u8 last_irr; /* edge detection */ | ||
31 | u8 irr; /* interrupt request register */ | ||
32 | u8 imr; /* interrupt mask register */ | ||
33 | u8 isr; /* interrupt service register */ | ||
34 | u8 priority_add; /* highest irq priority */ | ||
35 | u8 irq_base; | ||
36 | u8 read_reg_select; | ||
37 | u8 poll; | ||
38 | u8 special_mask; | ||
39 | u8 init_state; | ||
40 | u8 auto_eoi; | ||
41 | u8 rotate_on_auto_eoi; | ||
42 | u8 special_fully_nested_mode; | ||
43 | u8 init4; /* true if 4 byte init */ | ||
44 | u8 elcr; /* PIIX edge/trigger selection */ | ||
45 | u8 elcr_mask; | ||
46 | struct kvm_pic *pics_state; | ||
47 | }; | ||
48 | |||
49 | struct kvm_pic { | ||
50 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | ||
51 | irq_request_func *irq_request; | ||
52 | void *irq_request_opaque; | ||
53 | int output; /* intr from master PIC */ | ||
54 | struct kvm_io_device dev; | ||
55 | }; | ||
56 | |||
57 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | ||
58 | void kvm_pic_set_irq(void *opaque, int irq, int level); | ||
59 | int kvm_pic_read_irq(struct kvm_pic *s); | ||
60 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | ||
61 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v); | ||
62 | void kvm_pic_update_irq(struct kvm_pic *s); | ||
63 | |||
64 | #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS | ||
65 | #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ | ||
66 | #define IOAPIC_EDGE_TRIG 0 | ||
67 | #define IOAPIC_LEVEL_TRIG 1 | ||
68 | |||
69 | #define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000 | ||
70 | #define IOAPIC_MEM_LENGTH 0x100 | ||
71 | |||
72 | /* Direct registers. */ | ||
73 | #define IOAPIC_REG_SELECT 0x00 | ||
74 | #define IOAPIC_REG_WINDOW 0x10 | ||
75 | #define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */ | ||
76 | |||
77 | /* Indirect registers. */ | ||
78 | #define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ | ||
79 | #define IOAPIC_REG_VERSION 0x01 | ||
80 | #define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ | ||
81 | |||
82 | struct kvm_ioapic { | ||
83 | u64 base_address; | ||
84 | u32 ioregsel; | ||
85 | u32 id; | ||
86 | u32 irr; | ||
87 | u32 pad; | ||
88 | union ioapic_redir_entry { | ||
89 | u64 bits; | ||
90 | struct { | ||
91 | u8 vector; | ||
92 | u8 delivery_mode:3; | ||
93 | u8 dest_mode:1; | ||
94 | u8 delivery_status:1; | ||
95 | u8 polarity:1; | ||
96 | u8 remote_irr:1; | ||
97 | u8 trig_mode:1; | ||
98 | u8 mask:1; | ||
99 | u8 reserve:7; | ||
100 | u8 reserved[4]; | ||
101 | u8 dest_id; | ||
102 | } fields; | ||
103 | } redirtbl[IOAPIC_NUM_PINS]; | ||
104 | struct kvm_io_device dev; | ||
105 | struct kvm *kvm; | ||
106 | }; | ||
107 | |||
108 | struct kvm_lapic { | ||
109 | unsigned long base_address; | ||
110 | struct kvm_io_device dev; | ||
111 | struct { | ||
112 | atomic_t pending; | ||
113 | s64 period; /* unit: ns */ | ||
114 | u32 divide_count; | ||
115 | ktime_t last_update; | ||
116 | struct hrtimer dev; | ||
117 | } timer; | ||
118 | struct kvm_vcpu *vcpu; | ||
119 | struct page *regs_page; | ||
120 | void *regs; | ||
121 | }; | ||
122 | |||
123 | #ifdef DEBUG | ||
124 | #define ASSERT(x) \ | ||
125 | do { \ | ||
126 | if (!(x)) { \ | ||
127 | printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ | ||
128 | __FILE__, __LINE__, #x); \ | ||
129 | BUG(); \ | ||
130 | } \ | ||
131 | } while (0) | ||
132 | #else | ||
133 | #define ASSERT(x) do { } while (0) | ||
134 | #endif | ||
135 | |||
136 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | ||
137 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); | ||
138 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); | ||
139 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); | ||
140 | int kvm_create_lapic(struct kvm_vcpu *vcpu); | ||
141 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); | ||
142 | void kvm_free_apic(struct kvm_lapic *apic); | ||
143 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); | ||
144 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); | ||
145 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | ||
146 | struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, | ||
147 | unsigned long bitmap); | ||
148 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | ||
149 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | ||
150 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | ||
151 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector); | ||
152 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | ||
153 | int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig); | ||
154 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); | ||
155 | int kvm_ioapic_init(struct kvm *kvm); | ||
156 | void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | ||
157 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu); | ||
158 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | ||
159 | void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | ||
160 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | ||
161 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | ||
162 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); | ||
163 | void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); | ||
164 | |||
165 | #endif | ||
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 336be86c6f5a..ad0813843adc 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h | |||
@@ -13,60 +13,38 @@ | |||
13 | #include <linux/signal.h> | 13 | #include <linux/signal.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | #include <linux/preempt.h> | ||
16 | #include <asm/signal.h> | 17 | #include <asm/signal.h> |
17 | 18 | ||
18 | #include "vmx.h" | ||
19 | #include <linux/kvm.h> | 19 | #include <linux/kvm.h> |
20 | #include <linux/kvm_para.h> | 20 | #include <linux/kvm_para.h> |
21 | 21 | ||
22 | #define CR0_PE_MASK (1ULL << 0) | 22 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) |
23 | #define CR0_MP_MASK (1ULL << 1) | 23 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) |
24 | #define CR0_TS_MASK (1ULL << 3) | 24 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS|0xFFFFFF0000000000ULL) |
25 | #define CR0_NE_MASK (1ULL << 5) | ||
26 | #define CR0_WP_MASK (1ULL << 16) | ||
27 | #define CR0_NW_MASK (1ULL << 29) | ||
28 | #define CR0_CD_MASK (1ULL << 30) | ||
29 | #define CR0_PG_MASK (1ULL << 31) | ||
30 | |||
31 | #define CR3_WPT_MASK (1ULL << 3) | ||
32 | #define CR3_PCD_MASK (1ULL << 4) | ||
33 | |||
34 | #define CR3_RESEVED_BITS 0x07ULL | ||
35 | #define CR3_L_MODE_RESEVED_BITS (~((1ULL << 40) - 1) | 0x0fe7ULL) | ||
36 | #define CR3_FLAGS_MASK ((1ULL << 5) - 1) | ||
37 | |||
38 | #define CR4_VME_MASK (1ULL << 0) | ||
39 | #define CR4_PSE_MASK (1ULL << 4) | ||
40 | #define CR4_PAE_MASK (1ULL << 5) | ||
41 | #define CR4_PGE_MASK (1ULL << 7) | ||
42 | #define CR4_VMXE_MASK (1ULL << 13) | ||
43 | 25 | ||
44 | #define KVM_GUEST_CR0_MASK \ | 26 | #define KVM_GUEST_CR0_MASK \ |
45 | (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \ | 27 | (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \ |
46 | | CR0_NW_MASK | CR0_CD_MASK) | 28 | | X86_CR0_NW | X86_CR0_CD) |
47 | #define KVM_VM_CR0_ALWAYS_ON \ | 29 | #define KVM_VM_CR0_ALWAYS_ON \ |
48 | (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK \ | 30 | (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \ |
49 | | CR0_MP_MASK) | 31 | | X86_CR0_MP) |
50 | #define KVM_GUEST_CR4_MASK \ | 32 | #define KVM_GUEST_CR4_MASK \ |
51 | (CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK) | 33 | (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE) |
52 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK) | 34 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) |
53 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK | CR4_VME_MASK) | 35 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) |
54 | 36 | ||
55 | #define INVALID_PAGE (~(hpa_t)0) | 37 | #define INVALID_PAGE (~(hpa_t)0) |
56 | #define UNMAPPED_GVA (~(gpa_t)0) | 38 | #define UNMAPPED_GVA (~(gpa_t)0) |
57 | 39 | ||
58 | #define KVM_MAX_VCPUS 4 | 40 | #define KVM_MAX_VCPUS 4 |
59 | #define KVM_ALIAS_SLOTS 4 | 41 | #define KVM_ALIAS_SLOTS 4 |
60 | #define KVM_MEMORY_SLOTS 4 | 42 | #define KVM_MEMORY_SLOTS 8 |
61 | #define KVM_NUM_MMU_PAGES 1024 | 43 | #define KVM_NUM_MMU_PAGES 1024 |
62 | #define KVM_MIN_FREE_MMU_PAGES 5 | 44 | #define KVM_MIN_FREE_MMU_PAGES 5 |
63 | #define KVM_REFILL_PAGES 25 | 45 | #define KVM_REFILL_PAGES 25 |
64 | #define KVM_MAX_CPUID_ENTRIES 40 | 46 | #define KVM_MAX_CPUID_ENTRIES 40 |
65 | 47 | ||
66 | #define FX_IMAGE_SIZE 512 | ||
67 | #define FX_IMAGE_ALIGN 16 | ||
68 | #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) | ||
69 | |||
70 | #define DE_VECTOR 0 | 48 | #define DE_VECTOR 0 |
71 | #define NM_VECTOR 7 | 49 | #define NM_VECTOR 7 |
72 | #define DF_VECTOR 8 | 50 | #define DF_VECTOR 8 |
@@ -158,15 +136,8 @@ struct kvm_mmu_page { | |||
158 | }; | 136 | }; |
159 | }; | 137 | }; |
160 | 138 | ||
161 | struct vmcs { | ||
162 | u32 revision_id; | ||
163 | u32 abort; | ||
164 | char data[0]; | ||
165 | }; | ||
166 | |||
167 | #define vmx_msr_entry kvm_msr_entry | ||
168 | |||
169 | struct kvm_vcpu; | 139 | struct kvm_vcpu; |
140 | extern struct kmem_cache *kvm_vcpu_cache; | ||
170 | 141 | ||
171 | /* | 142 | /* |
172 | * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level | 143 | * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level |
@@ -260,6 +231,7 @@ struct kvm_stat { | |||
260 | u32 signal_exits; | 231 | u32 signal_exits; |
261 | u32 irq_window_exits; | 232 | u32 irq_window_exits; |
262 | u32 halt_exits; | 233 | u32 halt_exits; |
234 | u32 halt_wakeup; | ||
263 | u32 request_irq_exits; | 235 | u32 request_irq_exits; |
264 | u32 irq_exits; | 236 | u32 irq_exits; |
265 | u32 light_exits; | 237 | u32 light_exits; |
@@ -328,21 +300,17 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, | |||
328 | 300 | ||
329 | struct kvm_vcpu { | 301 | struct kvm_vcpu { |
330 | struct kvm *kvm; | 302 | struct kvm *kvm; |
331 | union { | 303 | struct preempt_notifier preempt_notifier; |
332 | struct vmcs *vmcs; | 304 | int vcpu_id; |
333 | struct vcpu_svm *svm; | ||
334 | }; | ||
335 | struct mutex mutex; | 305 | struct mutex mutex; |
336 | int cpu; | 306 | int cpu; |
337 | int launched; | ||
338 | u64 host_tsc; | 307 | u64 host_tsc; |
339 | struct kvm_run *run; | 308 | struct kvm_run *run; |
340 | int interrupt_window_open; | 309 | int interrupt_window_open; |
341 | int guest_mode; | 310 | int guest_mode; |
342 | unsigned long requests; | 311 | unsigned long requests; |
343 | unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ | 312 | unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ |
344 | #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) | 313 | DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); |
345 | unsigned long irq_pending[NR_IRQ_WORDS]; | ||
346 | unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ | 314 | unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ |
347 | unsigned long rip; /* needs vcpu_load_rsp_rip() */ | 315 | unsigned long rip; /* needs vcpu_load_rsp_rip() */ |
348 | 316 | ||
@@ -357,15 +325,15 @@ struct kvm_vcpu { | |||
357 | u64 pdptrs[4]; /* pae */ | 325 | u64 pdptrs[4]; /* pae */ |
358 | u64 shadow_efer; | 326 | u64 shadow_efer; |
359 | u64 apic_base; | 327 | u64 apic_base; |
328 | struct kvm_lapic *apic; /* kernel irqchip context */ | ||
329 | #define VCPU_MP_STATE_RUNNABLE 0 | ||
330 | #define VCPU_MP_STATE_UNINITIALIZED 1 | ||
331 | #define VCPU_MP_STATE_INIT_RECEIVED 2 | ||
332 | #define VCPU_MP_STATE_SIPI_RECEIVED 3 | ||
333 | #define VCPU_MP_STATE_HALTED 4 | ||
334 | int mp_state; | ||
335 | int sipi_vector; | ||
360 | u64 ia32_misc_enable_msr; | 336 | u64 ia32_misc_enable_msr; |
361 | int nmsrs; | ||
362 | int save_nmsrs; | ||
363 | int msr_offset_efer; | ||
364 | #ifdef CONFIG_X86_64 | ||
365 | int msr_offset_kernel_gs_base; | ||
366 | #endif | ||
367 | struct vmx_msr_entry *guest_msrs; | ||
368 | struct vmx_msr_entry *host_msrs; | ||
369 | 337 | ||
370 | struct kvm_mmu mmu; | 338 | struct kvm_mmu mmu; |
371 | 339 | ||
@@ -379,16 +347,10 @@ struct kvm_vcpu { | |||
379 | 347 | ||
380 | struct kvm_guest_debug guest_debug; | 348 | struct kvm_guest_debug guest_debug; |
381 | 349 | ||
382 | char fx_buf[FX_BUF_SIZE]; | 350 | struct i387_fxsave_struct host_fx_image; |
383 | char *host_fx_image; | 351 | struct i387_fxsave_struct guest_fx_image; |
384 | char *guest_fx_image; | ||
385 | int fpu_active; | 352 | int fpu_active; |
386 | int guest_fpu_loaded; | 353 | int guest_fpu_loaded; |
387 | struct vmx_host_state { | ||
388 | int loaded; | ||
389 | u16 fs_sel, gs_sel, ldt_sel; | ||
390 | int fs_gs_ldt_reload_needed; | ||
391 | } vmx_host_state; | ||
392 | 354 | ||
393 | int mmio_needed; | 355 | int mmio_needed; |
394 | int mmio_read_completed; | 356 | int mmio_read_completed; |
@@ -399,6 +361,7 @@ struct kvm_vcpu { | |||
399 | gva_t mmio_fault_cr2; | 361 | gva_t mmio_fault_cr2; |
400 | struct kvm_pio_request pio; | 362 | struct kvm_pio_request pio; |
401 | void *pio_data; | 363 | void *pio_data; |
364 | wait_queue_head_t wq; | ||
402 | 365 | ||
403 | int sigset_active; | 366 | int sigset_active; |
404 | sigset_t sigset; | 367 | sigset_t sigset; |
@@ -436,7 +399,7 @@ struct kvm_memory_slot { | |||
436 | }; | 399 | }; |
437 | 400 | ||
438 | struct kvm { | 401 | struct kvm { |
439 | spinlock_t lock; /* protects everything except vcpus */ | 402 | struct mutex lock; /* protects everything except vcpus */ |
440 | int naliases; | 403 | int naliases; |
441 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; | 404 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; |
442 | int nmemslots; | 405 | int nmemslots; |
@@ -447,39 +410,59 @@ struct kvm { | |||
447 | struct list_head active_mmu_pages; | 410 | struct list_head active_mmu_pages; |
448 | int n_free_mmu_pages; | 411 | int n_free_mmu_pages; |
449 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 412 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
450 | int nvcpus; | 413 | struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; |
451 | struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; | ||
452 | int memory_config_version; | ||
453 | int busy; | ||
454 | unsigned long rmap_overflow; | 414 | unsigned long rmap_overflow; |
455 | struct list_head vm_list; | 415 | struct list_head vm_list; |
456 | struct file *filp; | 416 | struct file *filp; |
457 | struct kvm_io_bus mmio_bus; | 417 | struct kvm_io_bus mmio_bus; |
458 | struct kvm_io_bus pio_bus; | 418 | struct kvm_io_bus pio_bus; |
419 | struct kvm_pic *vpic; | ||
420 | struct kvm_ioapic *vioapic; | ||
421 | int round_robin_prev_vcpu; | ||
459 | }; | 422 | }; |
460 | 423 | ||
424 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | ||
425 | { | ||
426 | return kvm->vpic; | ||
427 | } | ||
428 | |||
429 | static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | ||
430 | { | ||
431 | return kvm->vioapic; | ||
432 | } | ||
433 | |||
434 | static inline int irqchip_in_kernel(struct kvm *kvm) | ||
435 | { | ||
436 | return pic_irqchip(kvm) != 0; | ||
437 | } | ||
438 | |||
461 | struct descriptor_table { | 439 | struct descriptor_table { |
462 | u16 limit; | 440 | u16 limit; |
463 | unsigned long base; | 441 | unsigned long base; |
464 | } __attribute__((packed)); | 442 | } __attribute__((packed)); |
465 | 443 | ||
466 | struct kvm_arch_ops { | 444 | struct kvm_x86_ops { |
467 | int (*cpu_has_kvm_support)(void); /* __init */ | 445 | int (*cpu_has_kvm_support)(void); /* __init */ |
468 | int (*disabled_by_bios)(void); /* __init */ | 446 | int (*disabled_by_bios)(void); /* __init */ |
469 | void (*hardware_enable)(void *dummy); /* __init */ | 447 | void (*hardware_enable)(void *dummy); /* __init */ |
470 | void (*hardware_disable)(void *dummy); | 448 | void (*hardware_disable)(void *dummy); |
449 | void (*check_processor_compatibility)(void *rtn); | ||
471 | int (*hardware_setup)(void); /* __init */ | 450 | int (*hardware_setup)(void); /* __init */ |
472 | void (*hardware_unsetup)(void); /* __exit */ | 451 | void (*hardware_unsetup)(void); /* __exit */ |
473 | 452 | ||
474 | int (*vcpu_create)(struct kvm_vcpu *vcpu); | 453 | /* Create, but do not attach this VCPU */ |
454 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); | ||
475 | void (*vcpu_free)(struct kvm_vcpu *vcpu); | 455 | void (*vcpu_free)(struct kvm_vcpu *vcpu); |
456 | void (*vcpu_reset)(struct kvm_vcpu *vcpu); | ||
476 | 457 | ||
477 | void (*vcpu_load)(struct kvm_vcpu *vcpu); | 458 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); |
459 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | ||
478 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | 460 | void (*vcpu_put)(struct kvm_vcpu *vcpu); |
479 | void (*vcpu_decache)(struct kvm_vcpu *vcpu); | 461 | void (*vcpu_decache)(struct kvm_vcpu *vcpu); |
480 | 462 | ||
481 | int (*set_guest_debug)(struct kvm_vcpu *vcpu, | 463 | int (*set_guest_debug)(struct kvm_vcpu *vcpu, |
482 | struct kvm_debug_guest *dbg); | 464 | struct kvm_debug_guest *dbg); |
465 | void (*guest_debug_pre)(struct kvm_vcpu *vcpu); | ||
483 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | 466 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); |
484 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 467 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
485 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); | 468 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
@@ -505,27 +488,43 @@ struct kvm_arch_ops { | |||
505 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 488 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
506 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 489 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
507 | 490 | ||
508 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t addr); | ||
509 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 491 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
510 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, | 492 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, |
511 | unsigned long addr, u32 err_code); | 493 | unsigned long addr, u32 err_code); |
512 | 494 | ||
513 | void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code); | 495 | void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code); |
514 | 496 | ||
515 | int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); | 497 | void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); |
516 | int (*vcpu_setup)(struct kvm_vcpu *vcpu); | 498 | int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); |
517 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); | 499 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
518 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, | 500 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, |
519 | unsigned char *hypercall_addr); | 501 | unsigned char *hypercall_addr); |
502 | int (*get_irq)(struct kvm_vcpu *vcpu); | ||
503 | void (*set_irq)(struct kvm_vcpu *vcpu, int vec); | ||
504 | void (*inject_pending_irq)(struct kvm_vcpu *vcpu); | ||
505 | void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, | ||
506 | struct kvm_run *run); | ||
520 | }; | 507 | }; |
521 | 508 | ||
522 | extern struct kvm_arch_ops *kvm_arch_ops; | 509 | extern struct kvm_x86_ops *kvm_x86_ops; |
510 | |||
511 | /* The guest did something we don't support. */ | ||
512 | #define pr_unimpl(vcpu, fmt, ...) \ | ||
513 | do { \ | ||
514 | if (printk_ratelimit()) \ | ||
515 | printk(KERN_ERR "kvm: %i: cpu%i " fmt, \ | ||
516 | current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \ | ||
517 | } while(0) | ||
523 | 518 | ||
524 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) | 519 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) |
525 | #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) | 520 | #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) |
526 | 521 | ||
527 | int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); | 522 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); |
528 | void kvm_exit_arch(void); | 523 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); |
524 | |||
525 | int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size, | ||
526 | struct module *module); | ||
527 | void kvm_exit_x86(void); | ||
529 | 528 | ||
530 | int kvm_mmu_module_init(void); | 529 | int kvm_mmu_module_init(void); |
531 | void kvm_mmu_module_exit(void); | 530 | void kvm_mmu_module_exit(void); |
@@ -545,8 +544,6 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | |||
545 | hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); | 544 | hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); |
546 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); | 545 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); |
547 | 546 | ||
548 | void kvm_emulator_want_group7_invlpg(void); | ||
549 | |||
550 | extern hpa_t bad_page_address; | 547 | extern hpa_t bad_page_address; |
551 | 548 | ||
552 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | 549 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); |
@@ -561,6 +558,7 @@ enum emulation_result { | |||
561 | 558 | ||
562 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, | 559 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, |
563 | unsigned long cr2, u16 error_code); | 560 | unsigned long cr2, u16 error_code); |
561 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | ||
564 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 562 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
565 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 563 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
566 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | 564 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, |
@@ -574,9 +572,11 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | |||
574 | 572 | ||
575 | struct x86_emulate_ctxt; | 573 | struct x86_emulate_ctxt; |
576 | 574 | ||
577 | int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 575 | int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, |
578 | int size, unsigned long count, int string, int down, | 576 | int size, unsigned port); |
579 | gva_t address, int rep, unsigned port); | 577 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, |
578 | int size, unsigned long count, int down, | ||
579 | gva_t address, int rep, unsigned port); | ||
580 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 580 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
581 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 581 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
582 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 582 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
@@ -590,34 +590,33 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | |||
590 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); | 590 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); |
591 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); | 591 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); |
592 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); | 592 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); |
593 | unsigned long get_cr8(struct kvm_vcpu *vcpu); | ||
593 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 594 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
595 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | ||
594 | 596 | ||
595 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 597 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
596 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 598 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
597 | 599 | ||
598 | void fx_init(struct kvm_vcpu *vcpu); | 600 | void fx_init(struct kvm_vcpu *vcpu); |
599 | 601 | ||
600 | void load_msrs(struct vmx_msr_entry *e, int n); | ||
601 | void save_msrs(struct vmx_msr_entry *e, int n); | ||
602 | void kvm_resched(struct kvm_vcpu *vcpu); | 602 | void kvm_resched(struct kvm_vcpu *vcpu); |
603 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); | 603 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); |
604 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | 604 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); |
605 | void kvm_flush_remote_tlbs(struct kvm *kvm); | 605 | void kvm_flush_remote_tlbs(struct kvm *kvm); |
606 | 606 | ||
607 | int kvm_read_guest(struct kvm_vcpu *vcpu, | 607 | int emulator_read_std(unsigned long addr, |
608 | gva_t addr, | 608 | void *val, |
609 | unsigned long size, | 609 | unsigned int bytes, |
610 | void *dest); | 610 | struct kvm_vcpu *vcpu); |
611 | 611 | int emulator_write_emulated(unsigned long addr, | |
612 | int kvm_write_guest(struct kvm_vcpu *vcpu, | 612 | const void *val, |
613 | gva_t addr, | 613 | unsigned int bytes, |
614 | unsigned long size, | 614 | struct kvm_vcpu *vcpu); |
615 | void *data); | ||
616 | 615 | ||
617 | unsigned long segment_base(u16 selector); | 616 | unsigned long segment_base(u16 selector); |
618 | 617 | ||
619 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 618 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
620 | const u8 *old, const u8 *new, int bytes); | 619 | const u8 *new, int bytes); |
621 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | 620 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); |
622 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 621 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
623 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 622 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
@@ -656,17 +655,17 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu) | |||
656 | 655 | ||
657 | static inline int is_pae(struct kvm_vcpu *vcpu) | 656 | static inline int is_pae(struct kvm_vcpu *vcpu) |
658 | { | 657 | { |
659 | return vcpu->cr4 & CR4_PAE_MASK; | 658 | return vcpu->cr4 & X86_CR4_PAE; |
660 | } | 659 | } |
661 | 660 | ||
662 | static inline int is_pse(struct kvm_vcpu *vcpu) | 661 | static inline int is_pse(struct kvm_vcpu *vcpu) |
663 | { | 662 | { |
664 | return vcpu->cr4 & CR4_PSE_MASK; | 663 | return vcpu->cr4 & X86_CR4_PSE; |
665 | } | 664 | } |
666 | 665 | ||
667 | static inline int is_paging(struct kvm_vcpu *vcpu) | 666 | static inline int is_paging(struct kvm_vcpu *vcpu) |
668 | { | 667 | { |
669 | return vcpu->cr0 & CR0_PG_MASK; | 668 | return vcpu->cr0 & X86_CR0_PG; |
670 | } | 669 | } |
671 | 670 | ||
672 | static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot) | 671 | static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot) |
@@ -746,12 +745,12 @@ static inline unsigned long read_msr(unsigned long msr) | |||
746 | } | 745 | } |
747 | #endif | 746 | #endif |
748 | 747 | ||
749 | static inline void fx_save(void *image) | 748 | static inline void fx_save(struct i387_fxsave_struct *image) |
750 | { | 749 | { |
751 | asm ("fxsave (%0)":: "r" (image)); | 750 | asm ("fxsave (%0)":: "r" (image)); |
752 | } | 751 | } |
753 | 752 | ||
754 | static inline void fx_restore(void *image) | 753 | static inline void fx_restore(struct i387_fxsave_struct *image) |
755 | { | 754 | { |
756 | asm ("fxrstor (%0)":: "r" (image)); | 755 | asm ("fxrstor (%0)":: "r" (image)); |
757 | } | 756 | } |
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index cd0557954e50..353e58527d15 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "kvm.h" | 18 | #include "kvm.h" |
19 | #include "x86_emulate.h" | 19 | #include "x86_emulate.h" |
20 | #include "segment_descriptor.h" | 20 | #include "segment_descriptor.h" |
21 | #include "irq.h" | ||
21 | 22 | ||
22 | #include <linux/kvm.h> | 23 | #include <linux/kvm.h> |
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
@@ -37,6 +38,7 @@ | |||
37 | #include <linux/cpumask.h> | 38 | #include <linux/cpumask.h> |
38 | #include <linux/smp.h> | 39 | #include <linux/smp.h> |
39 | #include <linux/anon_inodes.h> | 40 | #include <linux/anon_inodes.h> |
41 | #include <linux/profile.h> | ||
40 | 42 | ||
41 | #include <asm/processor.h> | 43 | #include <asm/processor.h> |
42 | #include <asm/msr.h> | 44 | #include <asm/msr.h> |
@@ -52,9 +54,11 @@ static LIST_HEAD(vm_list); | |||
52 | 54 | ||
53 | static cpumask_t cpus_hardware_enabled; | 55 | static cpumask_t cpus_hardware_enabled; |
54 | 56 | ||
55 | struct kvm_arch_ops *kvm_arch_ops; | 57 | struct kvm_x86_ops *kvm_x86_ops; |
58 | struct kmem_cache *kvm_vcpu_cache; | ||
59 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | ||
56 | 60 | ||
57 | static void hardware_disable(void *ignored); | 61 | static __read_mostly struct preempt_ops kvm_preempt_ops; |
58 | 62 | ||
59 | #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) | 63 | #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) |
60 | 64 | ||
@@ -73,6 +77,7 @@ static struct kvm_stats_debugfs_item { | |||
73 | { "signal_exits", STAT_OFFSET(signal_exits) }, | 77 | { "signal_exits", STAT_OFFSET(signal_exits) }, |
74 | { "irq_window", STAT_OFFSET(irq_window_exits) }, | 78 | { "irq_window", STAT_OFFSET(irq_window_exits) }, |
75 | { "halt_exits", STAT_OFFSET(halt_exits) }, | 79 | { "halt_exits", STAT_OFFSET(halt_exits) }, |
80 | { "halt_wakeup", STAT_OFFSET(halt_wakeup) }, | ||
76 | { "request_irq", STAT_OFFSET(request_irq_exits) }, | 81 | { "request_irq", STAT_OFFSET(request_irq_exits) }, |
77 | { "irq_exits", STAT_OFFSET(irq_exits) }, | 82 | { "irq_exits", STAT_OFFSET(irq_exits) }, |
78 | { "light_exits", STAT_OFFSET(light_exits) }, | 83 | { "light_exits", STAT_OFFSET(light_exits) }, |
@@ -84,10 +89,17 @@ static struct dentry *debugfs_dir; | |||
84 | 89 | ||
85 | #define MAX_IO_MSRS 256 | 90 | #define MAX_IO_MSRS 256 |
86 | 91 | ||
87 | #define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL | 92 | #define CR0_RESERVED_BITS \ |
88 | #define LMSW_GUEST_MASK 0x0eULL | 93 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ |
89 | #define CR4_RESEVED_BITS (~((1ULL << 11) - 1)) | 94 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ |
90 | #define CR8_RESEVED_BITS (~0x0fULL) | 95 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) |
96 | #define CR4_RESERVED_BITS \ | ||
97 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | ||
98 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | ||
99 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
100 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | ||
101 | |||
102 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | ||
91 | #define EFER_RESERVED_BITS 0xfffffffffffff2fe | 103 | #define EFER_RESERVED_BITS 0xfffffffffffff2fe |
92 | 104 | ||
93 | #ifdef CONFIG_X86_64 | 105 | #ifdef CONFIG_X86_64 |
@@ -139,82 +151,14 @@ static inline int valid_vcpu(int n) | |||
139 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | 151 | return likely(n >= 0 && n < KVM_MAX_VCPUS); |
140 | } | 152 | } |
141 | 153 | ||
142 | int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, | ||
143 | void *dest) | ||
144 | { | ||
145 | unsigned char *host_buf = dest; | ||
146 | unsigned long req_size = size; | ||
147 | |||
148 | while (size) { | ||
149 | hpa_t paddr; | ||
150 | unsigned now; | ||
151 | unsigned offset; | ||
152 | hva_t guest_buf; | ||
153 | |||
154 | paddr = gva_to_hpa(vcpu, addr); | ||
155 | |||
156 | if (is_error_hpa(paddr)) | ||
157 | break; | ||
158 | |||
159 | guest_buf = (hva_t)kmap_atomic( | ||
160 | pfn_to_page(paddr >> PAGE_SHIFT), | ||
161 | KM_USER0); | ||
162 | offset = addr & ~PAGE_MASK; | ||
163 | guest_buf |= offset; | ||
164 | now = min(size, PAGE_SIZE - offset); | ||
165 | memcpy(host_buf, (void*)guest_buf, now); | ||
166 | host_buf += now; | ||
167 | addr += now; | ||
168 | size -= now; | ||
169 | kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0); | ||
170 | } | ||
171 | return req_size - size; | ||
172 | } | ||
173 | EXPORT_SYMBOL_GPL(kvm_read_guest); | ||
174 | |||
175 | int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, | ||
176 | void *data) | ||
177 | { | ||
178 | unsigned char *host_buf = data; | ||
179 | unsigned long req_size = size; | ||
180 | |||
181 | while (size) { | ||
182 | hpa_t paddr; | ||
183 | unsigned now; | ||
184 | unsigned offset; | ||
185 | hva_t guest_buf; | ||
186 | gfn_t gfn; | ||
187 | |||
188 | paddr = gva_to_hpa(vcpu, addr); | ||
189 | |||
190 | if (is_error_hpa(paddr)) | ||
191 | break; | ||
192 | |||
193 | gfn = vcpu->mmu.gva_to_gpa(vcpu, addr) >> PAGE_SHIFT; | ||
194 | mark_page_dirty(vcpu->kvm, gfn); | ||
195 | guest_buf = (hva_t)kmap_atomic( | ||
196 | pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0); | ||
197 | offset = addr & ~PAGE_MASK; | ||
198 | guest_buf |= offset; | ||
199 | now = min(size, PAGE_SIZE - offset); | ||
200 | memcpy((void*)guest_buf, host_buf, now); | ||
201 | host_buf += now; | ||
202 | addr += now; | ||
203 | size -= now; | ||
204 | kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0); | ||
205 | } | ||
206 | return req_size - size; | ||
207 | } | ||
208 | EXPORT_SYMBOL_GPL(kvm_write_guest); | ||
209 | |||
210 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 154 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
211 | { | 155 | { |
212 | if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) | 156 | if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) |
213 | return; | 157 | return; |
214 | 158 | ||
215 | vcpu->guest_fpu_loaded = 1; | 159 | vcpu->guest_fpu_loaded = 1; |
216 | fx_save(vcpu->host_fx_image); | 160 | fx_save(&vcpu->host_fx_image); |
217 | fx_restore(vcpu->guest_fx_image); | 161 | fx_restore(&vcpu->guest_fx_image); |
218 | } | 162 | } |
219 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); | 163 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); |
220 | 164 | ||
@@ -224,8 +168,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
224 | return; | 168 | return; |
225 | 169 | ||
226 | vcpu->guest_fpu_loaded = 0; | 170 | vcpu->guest_fpu_loaded = 0; |
227 | fx_save(vcpu->guest_fx_image); | 171 | fx_save(&vcpu->guest_fx_image); |
228 | fx_restore(vcpu->host_fx_image); | 172 | fx_restore(&vcpu->host_fx_image); |
229 | } | 173 | } |
230 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | 174 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); |
231 | 175 | ||
@@ -234,13 +178,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | |||
234 | */ | 178 | */ |
235 | static void vcpu_load(struct kvm_vcpu *vcpu) | 179 | static void vcpu_load(struct kvm_vcpu *vcpu) |
236 | { | 180 | { |
181 | int cpu; | ||
182 | |||
237 | mutex_lock(&vcpu->mutex); | 183 | mutex_lock(&vcpu->mutex); |
238 | kvm_arch_ops->vcpu_load(vcpu); | 184 | cpu = get_cpu(); |
185 | preempt_notifier_register(&vcpu->preempt_notifier); | ||
186 | kvm_x86_ops->vcpu_load(vcpu, cpu); | ||
187 | put_cpu(); | ||
239 | } | 188 | } |
240 | 189 | ||
241 | static void vcpu_put(struct kvm_vcpu *vcpu) | 190 | static void vcpu_put(struct kvm_vcpu *vcpu) |
242 | { | 191 | { |
243 | kvm_arch_ops->vcpu_put(vcpu); | 192 | preempt_disable(); |
193 | kvm_x86_ops->vcpu_put(vcpu); | ||
194 | preempt_notifier_unregister(&vcpu->preempt_notifier); | ||
195 | preempt_enable(); | ||
244 | mutex_unlock(&vcpu->mutex); | 196 | mutex_unlock(&vcpu->mutex); |
245 | } | 197 | } |
246 | 198 | ||
@@ -261,8 +213,10 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
261 | atomic_set(&completed, 0); | 213 | atomic_set(&completed, 0); |
262 | cpus_clear(cpus); | 214 | cpus_clear(cpus); |
263 | needed = 0; | 215 | needed = 0; |
264 | for (i = 0; i < kvm->nvcpus; ++i) { | 216 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
265 | vcpu = &kvm->vcpus[i]; | 217 | vcpu = kvm->vcpus[i]; |
218 | if (!vcpu) | ||
219 | continue; | ||
266 | if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) | 220 | if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) |
267 | continue; | 221 | continue; |
268 | cpu = vcpu->cpu; | 222 | cpu = vcpu->cpu; |
@@ -286,37 +240,79 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
286 | } | 240 | } |
287 | } | 241 | } |
288 | 242 | ||
243 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | ||
244 | { | ||
245 | struct page *page; | ||
246 | int r; | ||
247 | |||
248 | mutex_init(&vcpu->mutex); | ||
249 | vcpu->cpu = -1; | ||
250 | vcpu->mmu.root_hpa = INVALID_PAGE; | ||
251 | vcpu->kvm = kvm; | ||
252 | vcpu->vcpu_id = id; | ||
253 | if (!irqchip_in_kernel(kvm) || id == 0) | ||
254 | vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; | ||
255 | else | ||
256 | vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; | ||
257 | init_waitqueue_head(&vcpu->wq); | ||
258 | |||
259 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
260 | if (!page) { | ||
261 | r = -ENOMEM; | ||
262 | goto fail; | ||
263 | } | ||
264 | vcpu->run = page_address(page); | ||
265 | |||
266 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
267 | if (!page) { | ||
268 | r = -ENOMEM; | ||
269 | goto fail_free_run; | ||
270 | } | ||
271 | vcpu->pio_data = page_address(page); | ||
272 | |||
273 | r = kvm_mmu_create(vcpu); | ||
274 | if (r < 0) | ||
275 | goto fail_free_pio_data; | ||
276 | |||
277 | return 0; | ||
278 | |||
279 | fail_free_pio_data: | ||
280 | free_page((unsigned long)vcpu->pio_data); | ||
281 | fail_free_run: | ||
282 | free_page((unsigned long)vcpu->run); | ||
283 | fail: | ||
284 | return -ENOMEM; | ||
285 | } | ||
286 | EXPORT_SYMBOL_GPL(kvm_vcpu_init); | ||
287 | |||
288 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
289 | { | ||
290 | kvm_mmu_destroy(vcpu); | ||
291 | if (vcpu->apic) | ||
292 | hrtimer_cancel(&vcpu->apic->timer.dev); | ||
293 | kvm_free_apic(vcpu->apic); | ||
294 | free_page((unsigned long)vcpu->pio_data); | ||
295 | free_page((unsigned long)vcpu->run); | ||
296 | } | ||
297 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); | ||
298 | |||
289 | static struct kvm *kvm_create_vm(void) | 299 | static struct kvm *kvm_create_vm(void) |
290 | { | 300 | { |
291 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | 301 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); |
292 | int i; | ||
293 | 302 | ||
294 | if (!kvm) | 303 | if (!kvm) |
295 | return ERR_PTR(-ENOMEM); | 304 | return ERR_PTR(-ENOMEM); |
296 | 305 | ||
297 | kvm_io_bus_init(&kvm->pio_bus); | 306 | kvm_io_bus_init(&kvm->pio_bus); |
298 | spin_lock_init(&kvm->lock); | 307 | mutex_init(&kvm->lock); |
299 | INIT_LIST_HEAD(&kvm->active_mmu_pages); | 308 | INIT_LIST_HEAD(&kvm->active_mmu_pages); |
300 | kvm_io_bus_init(&kvm->mmio_bus); | 309 | kvm_io_bus_init(&kvm->mmio_bus); |
301 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
302 | struct kvm_vcpu *vcpu = &kvm->vcpus[i]; | ||
303 | |||
304 | mutex_init(&vcpu->mutex); | ||
305 | vcpu->cpu = -1; | ||
306 | vcpu->kvm = kvm; | ||
307 | vcpu->mmu.root_hpa = INVALID_PAGE; | ||
308 | } | ||
309 | spin_lock(&kvm_lock); | 310 | spin_lock(&kvm_lock); |
310 | list_add(&kvm->vm_list, &vm_list); | 311 | list_add(&kvm->vm_list, &vm_list); |
311 | spin_unlock(&kvm_lock); | 312 | spin_unlock(&kvm_lock); |
312 | return kvm; | 313 | return kvm; |
313 | } | 314 | } |
314 | 315 | ||
315 | static int kvm_dev_open(struct inode *inode, struct file *filp) | ||
316 | { | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | /* | 316 | /* |
321 | * Free any memory in @free but not in @dont. | 317 | * Free any memory in @free but not in @dont. |
322 | */ | 318 | */ |
@@ -353,7 +349,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) | |||
353 | { | 349 | { |
354 | int i; | 350 | int i; |
355 | 351 | ||
356 | for (i = 0; i < 2; ++i) | 352 | for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i) |
357 | if (vcpu->pio.guest_pages[i]) { | 353 | if (vcpu->pio.guest_pages[i]) { |
358 | __free_page(vcpu->pio.guest_pages[i]); | 354 | __free_page(vcpu->pio.guest_pages[i]); |
359 | vcpu->pio.guest_pages[i] = NULL; | 355 | vcpu->pio.guest_pages[i] = NULL; |
@@ -362,30 +358,11 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) | |||
362 | 358 | ||
363 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | 359 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) |
364 | { | 360 | { |
365 | if (!vcpu->vmcs) | ||
366 | return; | ||
367 | |||
368 | vcpu_load(vcpu); | 361 | vcpu_load(vcpu); |
369 | kvm_mmu_unload(vcpu); | 362 | kvm_mmu_unload(vcpu); |
370 | vcpu_put(vcpu); | 363 | vcpu_put(vcpu); |
371 | } | 364 | } |
372 | 365 | ||
373 | static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | ||
374 | { | ||
375 | if (!vcpu->vmcs) | ||
376 | return; | ||
377 | |||
378 | vcpu_load(vcpu); | ||
379 | kvm_mmu_destroy(vcpu); | ||
380 | vcpu_put(vcpu); | ||
381 | kvm_arch_ops->vcpu_free(vcpu); | ||
382 | free_page((unsigned long)vcpu->run); | ||
383 | vcpu->run = NULL; | ||
384 | free_page((unsigned long)vcpu->pio_data); | ||
385 | vcpu->pio_data = NULL; | ||
386 | free_pio_guest_pages(vcpu); | ||
387 | } | ||
388 | |||
389 | static void kvm_free_vcpus(struct kvm *kvm) | 366 | static void kvm_free_vcpus(struct kvm *kvm) |
390 | { | 367 | { |
391 | unsigned int i; | 368 | unsigned int i; |
@@ -394,14 +371,15 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
394 | * Unpin any mmu pages first. | 371 | * Unpin any mmu pages first. |
395 | */ | 372 | */ |
396 | for (i = 0; i < KVM_MAX_VCPUS; ++i) | 373 | for (i = 0; i < KVM_MAX_VCPUS; ++i) |
397 | kvm_unload_vcpu_mmu(&kvm->vcpus[i]); | 374 | if (kvm->vcpus[i]) |
398 | for (i = 0; i < KVM_MAX_VCPUS; ++i) | 375 | kvm_unload_vcpu_mmu(kvm->vcpus[i]); |
399 | kvm_free_vcpu(&kvm->vcpus[i]); | 376 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
400 | } | 377 | if (kvm->vcpus[i]) { |
378 | kvm_x86_ops->vcpu_free(kvm->vcpus[i]); | ||
379 | kvm->vcpus[i] = NULL; | ||
380 | } | ||
381 | } | ||
401 | 382 | ||
402 | static int kvm_dev_release(struct inode *inode, struct file *filp) | ||
403 | { | ||
404 | return 0; | ||
405 | } | 383 | } |
406 | 384 | ||
407 | static void kvm_destroy_vm(struct kvm *kvm) | 385 | static void kvm_destroy_vm(struct kvm *kvm) |
@@ -411,6 +389,8 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
411 | spin_unlock(&kvm_lock); | 389 | spin_unlock(&kvm_lock); |
412 | kvm_io_bus_destroy(&kvm->pio_bus); | 390 | kvm_io_bus_destroy(&kvm->pio_bus); |
413 | kvm_io_bus_destroy(&kvm->mmio_bus); | 391 | kvm_io_bus_destroy(&kvm->mmio_bus); |
392 | kfree(kvm->vpic); | ||
393 | kfree(kvm->vioapic); | ||
414 | kvm_free_vcpus(kvm); | 394 | kvm_free_vcpus(kvm); |
415 | kvm_free_physmem(kvm); | 395 | kvm_free_physmem(kvm); |
416 | kfree(kvm); | 396 | kfree(kvm); |
@@ -426,7 +406,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
426 | 406 | ||
427 | static void inject_gp(struct kvm_vcpu *vcpu) | 407 | static void inject_gp(struct kvm_vcpu *vcpu) |
428 | { | 408 | { |
429 | kvm_arch_ops->inject_gp(vcpu, 0); | 409 | kvm_x86_ops->inject_gp(vcpu, 0); |
430 | } | 410 | } |
431 | 411 | ||
432 | /* | 412 | /* |
@@ -437,58 +417,60 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
437 | gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; | 417 | gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; |
438 | unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; | 418 | unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; |
439 | int i; | 419 | int i; |
440 | u64 pdpte; | ||
441 | u64 *pdpt; | 420 | u64 *pdpt; |
442 | int ret; | 421 | int ret; |
443 | struct page *page; | 422 | struct page *page; |
423 | u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; | ||
444 | 424 | ||
445 | spin_lock(&vcpu->kvm->lock); | 425 | mutex_lock(&vcpu->kvm->lock); |
446 | page = gfn_to_page(vcpu->kvm, pdpt_gfn); | 426 | page = gfn_to_page(vcpu->kvm, pdpt_gfn); |
447 | /* FIXME: !page - emulate? 0xff? */ | 427 | if (!page) { |
428 | ret = 0; | ||
429 | goto out; | ||
430 | } | ||
431 | |||
448 | pdpt = kmap_atomic(page, KM_USER0); | 432 | pdpt = kmap_atomic(page, KM_USER0); |
433 | memcpy(pdpte, pdpt+offset, sizeof(pdpte)); | ||
434 | kunmap_atomic(pdpt, KM_USER0); | ||
449 | 435 | ||
450 | ret = 1; | 436 | for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { |
451 | for (i = 0; i < 4; ++i) { | 437 | if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { |
452 | pdpte = pdpt[offset + i]; | ||
453 | if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) { | ||
454 | ret = 0; | 438 | ret = 0; |
455 | goto out; | 439 | goto out; |
456 | } | 440 | } |
457 | } | 441 | } |
442 | ret = 1; | ||
458 | 443 | ||
459 | for (i = 0; i < 4; ++i) | 444 | memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs)); |
460 | vcpu->pdptrs[i] = pdpt[offset + i]; | ||
461 | |||
462 | out: | 445 | out: |
463 | kunmap_atomic(pdpt, KM_USER0); | 446 | mutex_unlock(&vcpu->kvm->lock); |
464 | spin_unlock(&vcpu->kvm->lock); | ||
465 | 447 | ||
466 | return ret; | 448 | return ret; |
467 | } | 449 | } |
468 | 450 | ||
469 | void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 451 | void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
470 | { | 452 | { |
471 | if (cr0 & CR0_RESEVED_BITS) { | 453 | if (cr0 & CR0_RESERVED_BITS) { |
472 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | 454 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", |
473 | cr0, vcpu->cr0); | 455 | cr0, vcpu->cr0); |
474 | inject_gp(vcpu); | 456 | inject_gp(vcpu); |
475 | return; | 457 | return; |
476 | } | 458 | } |
477 | 459 | ||
478 | if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) { | 460 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { |
479 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); | 461 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); |
480 | inject_gp(vcpu); | 462 | inject_gp(vcpu); |
481 | return; | 463 | return; |
482 | } | 464 | } |
483 | 465 | ||
484 | if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) { | 466 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { |
485 | printk(KERN_DEBUG "set_cr0: #GP, set PG flag " | 467 | printk(KERN_DEBUG "set_cr0: #GP, set PG flag " |
486 | "and a clear PE flag\n"); | 468 | "and a clear PE flag\n"); |
487 | inject_gp(vcpu); | 469 | inject_gp(vcpu); |
488 | return; | 470 | return; |
489 | } | 471 | } |
490 | 472 | ||
491 | if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { | 473 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
492 | #ifdef CONFIG_X86_64 | 474 | #ifdef CONFIG_X86_64 |
493 | if ((vcpu->shadow_efer & EFER_LME)) { | 475 | if ((vcpu->shadow_efer & EFER_LME)) { |
494 | int cs_db, cs_l; | 476 | int cs_db, cs_l; |
@@ -499,7 +481,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
499 | inject_gp(vcpu); | 481 | inject_gp(vcpu); |
500 | return; | 482 | return; |
501 | } | 483 | } |
502 | kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 484 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
503 | if (cs_l) { | 485 | if (cs_l) { |
504 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | 486 | printk(KERN_DEBUG "set_cr0: #GP, start paging " |
505 | "in long mode while CS.L == 1\n"); | 487 | "in long mode while CS.L == 1\n"); |
@@ -518,12 +500,12 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
518 | 500 | ||
519 | } | 501 | } |
520 | 502 | ||
521 | kvm_arch_ops->set_cr0(vcpu, cr0); | 503 | kvm_x86_ops->set_cr0(vcpu, cr0); |
522 | vcpu->cr0 = cr0; | 504 | vcpu->cr0 = cr0; |
523 | 505 | ||
524 | spin_lock(&vcpu->kvm->lock); | 506 | mutex_lock(&vcpu->kvm->lock); |
525 | kvm_mmu_reset_context(vcpu); | 507 | kvm_mmu_reset_context(vcpu); |
526 | spin_unlock(&vcpu->kvm->lock); | 508 | mutex_unlock(&vcpu->kvm->lock); |
527 | return; | 509 | return; |
528 | } | 510 | } |
529 | EXPORT_SYMBOL_GPL(set_cr0); | 511 | EXPORT_SYMBOL_GPL(set_cr0); |
@@ -536,62 +518,72 @@ EXPORT_SYMBOL_GPL(lmsw); | |||
536 | 518 | ||
537 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 519 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
538 | { | 520 | { |
539 | if (cr4 & CR4_RESEVED_BITS) { | 521 | if (cr4 & CR4_RESERVED_BITS) { |
540 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); | 522 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); |
541 | inject_gp(vcpu); | 523 | inject_gp(vcpu); |
542 | return; | 524 | return; |
543 | } | 525 | } |
544 | 526 | ||
545 | if (is_long_mode(vcpu)) { | 527 | if (is_long_mode(vcpu)) { |
546 | if (!(cr4 & CR4_PAE_MASK)) { | 528 | if (!(cr4 & X86_CR4_PAE)) { |
547 | printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " | 529 | printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " |
548 | "in long mode\n"); | 530 | "in long mode\n"); |
549 | inject_gp(vcpu); | 531 | inject_gp(vcpu); |
550 | return; | 532 | return; |
551 | } | 533 | } |
552 | } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) | 534 | } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) |
553 | && !load_pdptrs(vcpu, vcpu->cr3)) { | 535 | && !load_pdptrs(vcpu, vcpu->cr3)) { |
554 | printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); | 536 | printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); |
555 | inject_gp(vcpu); | 537 | inject_gp(vcpu); |
538 | return; | ||
556 | } | 539 | } |
557 | 540 | ||
558 | if (cr4 & CR4_VMXE_MASK) { | 541 | if (cr4 & X86_CR4_VMXE) { |
559 | printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); | 542 | printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); |
560 | inject_gp(vcpu); | 543 | inject_gp(vcpu); |
561 | return; | 544 | return; |
562 | } | 545 | } |
563 | kvm_arch_ops->set_cr4(vcpu, cr4); | 546 | kvm_x86_ops->set_cr4(vcpu, cr4); |
564 | spin_lock(&vcpu->kvm->lock); | 547 | vcpu->cr4 = cr4; |
548 | mutex_lock(&vcpu->kvm->lock); | ||
565 | kvm_mmu_reset_context(vcpu); | 549 | kvm_mmu_reset_context(vcpu); |
566 | spin_unlock(&vcpu->kvm->lock); | 550 | mutex_unlock(&vcpu->kvm->lock); |
567 | } | 551 | } |
568 | EXPORT_SYMBOL_GPL(set_cr4); | 552 | EXPORT_SYMBOL_GPL(set_cr4); |
569 | 553 | ||
570 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 554 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
571 | { | 555 | { |
572 | if (is_long_mode(vcpu)) { | 556 | if (is_long_mode(vcpu)) { |
573 | if (cr3 & CR3_L_MODE_RESEVED_BITS) { | 557 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { |
574 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); | 558 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); |
575 | inject_gp(vcpu); | 559 | inject_gp(vcpu); |
576 | return; | 560 | return; |
577 | } | 561 | } |
578 | } else { | 562 | } else { |
579 | if (cr3 & CR3_RESEVED_BITS) { | 563 | if (is_pae(vcpu)) { |
580 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); | 564 | if (cr3 & CR3_PAE_RESERVED_BITS) { |
581 | inject_gp(vcpu); | 565 | printk(KERN_DEBUG |
582 | return; | 566 | "set_cr3: #GP, reserved bits\n"); |
583 | } | 567 | inject_gp(vcpu); |
584 | if (is_paging(vcpu) && is_pae(vcpu) && | 568 | return; |
585 | !load_pdptrs(vcpu, cr3)) { | 569 | } |
586 | printk(KERN_DEBUG "set_cr3: #GP, pdptrs " | 570 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { |
587 | "reserved bits\n"); | 571 | printk(KERN_DEBUG "set_cr3: #GP, pdptrs " |
588 | inject_gp(vcpu); | 572 | "reserved bits\n"); |
589 | return; | 573 | inject_gp(vcpu); |
574 | return; | ||
575 | } | ||
576 | } else { | ||
577 | if (cr3 & CR3_NONPAE_RESERVED_BITS) { | ||
578 | printk(KERN_DEBUG | ||
579 | "set_cr3: #GP, reserved bits\n"); | ||
580 | inject_gp(vcpu); | ||
581 | return; | ||
582 | } | ||
590 | } | 583 | } |
591 | } | 584 | } |
592 | 585 | ||
593 | vcpu->cr3 = cr3; | 586 | mutex_lock(&vcpu->kvm->lock); |
594 | spin_lock(&vcpu->kvm->lock); | ||
595 | /* | 587 | /* |
596 | * Does the new cr3 value map to physical memory? (Note, we | 588 | * Does the new cr3 value map to physical memory? (Note, we |
597 | * catch an invalid cr3 even in real-mode, because it would | 589 | * catch an invalid cr3 even in real-mode, because it would |
@@ -603,46 +595,73 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
603 | */ | 595 | */ |
604 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | 596 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) |
605 | inject_gp(vcpu); | 597 | inject_gp(vcpu); |
606 | else | 598 | else { |
599 | vcpu->cr3 = cr3; | ||
607 | vcpu->mmu.new_cr3(vcpu); | 600 | vcpu->mmu.new_cr3(vcpu); |
608 | spin_unlock(&vcpu->kvm->lock); | 601 | } |
602 | mutex_unlock(&vcpu->kvm->lock); | ||
609 | } | 603 | } |
610 | EXPORT_SYMBOL_GPL(set_cr3); | 604 | EXPORT_SYMBOL_GPL(set_cr3); |
611 | 605 | ||
612 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 606 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
613 | { | 607 | { |
614 | if ( cr8 & CR8_RESEVED_BITS) { | 608 | if (cr8 & CR8_RESERVED_BITS) { |
615 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); | 609 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); |
616 | inject_gp(vcpu); | 610 | inject_gp(vcpu); |
617 | return; | 611 | return; |
618 | } | 612 | } |
619 | vcpu->cr8 = cr8; | 613 | if (irqchip_in_kernel(vcpu->kvm)) |
614 | kvm_lapic_set_tpr(vcpu, cr8); | ||
615 | else | ||
616 | vcpu->cr8 = cr8; | ||
620 | } | 617 | } |
621 | EXPORT_SYMBOL_GPL(set_cr8); | 618 | EXPORT_SYMBOL_GPL(set_cr8); |
622 | 619 | ||
623 | void fx_init(struct kvm_vcpu *vcpu) | 620 | unsigned long get_cr8(struct kvm_vcpu *vcpu) |
621 | { | ||
622 | if (irqchip_in_kernel(vcpu->kvm)) | ||
623 | return kvm_lapic_get_cr8(vcpu); | ||
624 | else | ||
625 | return vcpu->cr8; | ||
626 | } | ||
627 | EXPORT_SYMBOL_GPL(get_cr8); | ||
628 | |||
629 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | ||
624 | { | 630 | { |
625 | struct __attribute__ ((__packed__)) fx_image_s { | 631 | if (irqchip_in_kernel(vcpu->kvm)) |
626 | u16 control; //fcw | 632 | return vcpu->apic_base; |
627 | u16 status; //fsw | 633 | else |
628 | u16 tag; // ftw | 634 | return vcpu->apic_base; |
629 | u16 opcode; //fop | 635 | } |
630 | u64 ip; // fpu ip | 636 | EXPORT_SYMBOL_GPL(kvm_get_apic_base); |
631 | u64 operand;// fpu dp | ||
632 | u32 mxcsr; | ||
633 | u32 mxcsr_mask; | ||
634 | 637 | ||
635 | } *fx_image; | 638 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) |
639 | { | ||
640 | /* TODO: reserve bits check */ | ||
641 | if (irqchip_in_kernel(vcpu->kvm)) | ||
642 | kvm_lapic_set_base(vcpu, data); | ||
643 | else | ||
644 | vcpu->apic_base = data; | ||
645 | } | ||
646 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | ||
647 | |||
648 | void fx_init(struct kvm_vcpu *vcpu) | ||
649 | { | ||
650 | unsigned after_mxcsr_mask; | ||
636 | 651 | ||
637 | fx_save(vcpu->host_fx_image); | 652 | /* Initialize guest FPU by resetting ours and saving into guest's */ |
653 | preempt_disable(); | ||
654 | fx_save(&vcpu->host_fx_image); | ||
638 | fpu_init(); | 655 | fpu_init(); |
639 | fx_save(vcpu->guest_fx_image); | 656 | fx_save(&vcpu->guest_fx_image); |
640 | fx_restore(vcpu->host_fx_image); | 657 | fx_restore(&vcpu->host_fx_image); |
658 | preempt_enable(); | ||
641 | 659 | ||
642 | fx_image = (struct fx_image_s *)vcpu->guest_fx_image; | 660 | vcpu->cr0 |= X86_CR0_ET; |
643 | fx_image->mxcsr = 0x1f80; | 661 | after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); |
644 | memset(vcpu->guest_fx_image + sizeof(struct fx_image_s), | 662 | vcpu->guest_fx_image.mxcsr = 0x1f80; |
645 | 0, FX_IMAGE_SIZE - sizeof(struct fx_image_s)); | 663 | memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask, |
664 | 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); | ||
646 | } | 665 | } |
647 | EXPORT_SYMBOL_GPL(fx_init); | 666 | EXPORT_SYMBOL_GPL(fx_init); |
648 | 667 | ||
@@ -661,7 +680,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | |||
661 | unsigned long i; | 680 | unsigned long i; |
662 | struct kvm_memory_slot *memslot; | 681 | struct kvm_memory_slot *memslot; |
663 | struct kvm_memory_slot old, new; | 682 | struct kvm_memory_slot old, new; |
664 | int memory_config_version; | ||
665 | 683 | ||
666 | r = -EINVAL; | 684 | r = -EINVAL; |
667 | /* General sanity checks */ | 685 | /* General sanity checks */ |
@@ -681,10 +699,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | |||
681 | if (!npages) | 699 | if (!npages) |
682 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; | 700 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; |
683 | 701 | ||
684 | raced: | 702 | mutex_lock(&kvm->lock); |
685 | spin_lock(&kvm->lock); | ||
686 | 703 | ||
687 | memory_config_version = kvm->memory_config_version; | ||
688 | new = old = *memslot; | 704 | new = old = *memslot; |
689 | 705 | ||
690 | new.base_gfn = base_gfn; | 706 | new.base_gfn = base_gfn; |
@@ -707,11 +723,6 @@ raced: | |||
707 | (base_gfn >= s->base_gfn + s->npages))) | 723 | (base_gfn >= s->base_gfn + s->npages))) |
708 | goto out_unlock; | 724 | goto out_unlock; |
709 | } | 725 | } |
710 | /* | ||
711 | * Do memory allocations outside lock. memory_config_version will | ||
712 | * detect any races. | ||
713 | */ | ||
714 | spin_unlock(&kvm->lock); | ||
715 | 726 | ||
716 | /* Deallocate if slot is being removed */ | 727 | /* Deallocate if slot is being removed */ |
717 | if (!npages) | 728 | if (!npages) |
@@ -728,14 +739,14 @@ raced: | |||
728 | new.phys_mem = vmalloc(npages * sizeof(struct page *)); | 739 | new.phys_mem = vmalloc(npages * sizeof(struct page *)); |
729 | 740 | ||
730 | if (!new.phys_mem) | 741 | if (!new.phys_mem) |
731 | goto out_free; | 742 | goto out_unlock; |
732 | 743 | ||
733 | memset(new.phys_mem, 0, npages * sizeof(struct page *)); | 744 | memset(new.phys_mem, 0, npages * sizeof(struct page *)); |
734 | for (i = 0; i < npages; ++i) { | 745 | for (i = 0; i < npages; ++i) { |
735 | new.phys_mem[i] = alloc_page(GFP_HIGHUSER | 746 | new.phys_mem[i] = alloc_page(GFP_HIGHUSER |
736 | | __GFP_ZERO); | 747 | | __GFP_ZERO); |
737 | if (!new.phys_mem[i]) | 748 | if (!new.phys_mem[i]) |
738 | goto out_free; | 749 | goto out_unlock; |
739 | set_page_private(new.phys_mem[i],0); | 750 | set_page_private(new.phys_mem[i],0); |
740 | } | 751 | } |
741 | } | 752 | } |
@@ -746,39 +757,25 @@ raced: | |||
746 | 757 | ||
747 | new.dirty_bitmap = vmalloc(dirty_bytes); | 758 | new.dirty_bitmap = vmalloc(dirty_bytes); |
748 | if (!new.dirty_bitmap) | 759 | if (!new.dirty_bitmap) |
749 | goto out_free; | 760 | goto out_unlock; |
750 | memset(new.dirty_bitmap, 0, dirty_bytes); | 761 | memset(new.dirty_bitmap, 0, dirty_bytes); |
751 | } | 762 | } |
752 | 763 | ||
753 | spin_lock(&kvm->lock); | ||
754 | |||
755 | if (memory_config_version != kvm->memory_config_version) { | ||
756 | spin_unlock(&kvm->lock); | ||
757 | kvm_free_physmem_slot(&new, &old); | ||
758 | goto raced; | ||
759 | } | ||
760 | |||
761 | r = -EAGAIN; | ||
762 | if (kvm->busy) | ||
763 | goto out_unlock; | ||
764 | |||
765 | if (mem->slot >= kvm->nmemslots) | 764 | if (mem->slot >= kvm->nmemslots) |
766 | kvm->nmemslots = mem->slot + 1; | 765 | kvm->nmemslots = mem->slot + 1; |
767 | 766 | ||
768 | *memslot = new; | 767 | *memslot = new; |
769 | ++kvm->memory_config_version; | ||
770 | 768 | ||
771 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 769 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
772 | kvm_flush_remote_tlbs(kvm); | 770 | kvm_flush_remote_tlbs(kvm); |
773 | 771 | ||
774 | spin_unlock(&kvm->lock); | 772 | mutex_unlock(&kvm->lock); |
775 | 773 | ||
776 | kvm_free_physmem_slot(&old, &new); | 774 | kvm_free_physmem_slot(&old, &new); |
777 | return 0; | 775 | return 0; |
778 | 776 | ||
779 | out_unlock: | 777 | out_unlock: |
780 | spin_unlock(&kvm->lock); | 778 | mutex_unlock(&kvm->lock); |
781 | out_free: | ||
782 | kvm_free_physmem_slot(&new, &old); | 779 | kvm_free_physmem_slot(&new, &old); |
783 | out: | 780 | out: |
784 | return r; | 781 | return r; |
@@ -795,14 +792,8 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
795 | int n; | 792 | int n; |
796 | unsigned long any = 0; | 793 | unsigned long any = 0; |
797 | 794 | ||
798 | spin_lock(&kvm->lock); | 795 | mutex_lock(&kvm->lock); |
799 | 796 | ||
800 | /* | ||
801 | * Prevent changes to guest memory configuration even while the lock | ||
802 | * is not taken. | ||
803 | */ | ||
804 | ++kvm->busy; | ||
805 | spin_unlock(&kvm->lock); | ||
806 | r = -EINVAL; | 797 | r = -EINVAL; |
807 | if (log->slot >= KVM_MEMORY_SLOTS) | 798 | if (log->slot >= KVM_MEMORY_SLOTS) |
808 | goto out; | 799 | goto out; |
@@ -821,18 +812,17 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
821 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) | 812 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) |
822 | goto out; | 813 | goto out; |
823 | 814 | ||
824 | spin_lock(&kvm->lock); | 815 | /* If nothing is dirty, don't bother messing with page tables. */ |
825 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 816 | if (any) { |
826 | kvm_flush_remote_tlbs(kvm); | 817 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
827 | memset(memslot->dirty_bitmap, 0, n); | 818 | kvm_flush_remote_tlbs(kvm); |
828 | spin_unlock(&kvm->lock); | 819 | memset(memslot->dirty_bitmap, 0, n); |
820 | } | ||
829 | 821 | ||
830 | r = 0; | 822 | r = 0; |
831 | 823 | ||
832 | out: | 824 | out: |
833 | spin_lock(&kvm->lock); | 825 | mutex_unlock(&kvm->lock); |
834 | --kvm->busy; | ||
835 | spin_unlock(&kvm->lock); | ||
836 | return r; | 826 | return r; |
837 | } | 827 | } |
838 | 828 | ||
@@ -862,7 +852,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
862 | < alias->target_phys_addr) | 852 | < alias->target_phys_addr) |
863 | goto out; | 853 | goto out; |
864 | 854 | ||
865 | spin_lock(&kvm->lock); | 855 | mutex_lock(&kvm->lock); |
866 | 856 | ||
867 | p = &kvm->aliases[alias->slot]; | 857 | p = &kvm->aliases[alias->slot]; |
868 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 858 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
@@ -876,7 +866,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
876 | 866 | ||
877 | kvm_mmu_zap_all(kvm); | 867 | kvm_mmu_zap_all(kvm); |
878 | 868 | ||
879 | spin_unlock(&kvm->lock); | 869 | mutex_unlock(&kvm->lock); |
880 | 870 | ||
881 | return 0; | 871 | return 0; |
882 | 872 | ||
@@ -884,6 +874,63 @@ out: | |||
884 | return r; | 874 | return r; |
885 | } | 875 | } |
886 | 876 | ||
877 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | ||
878 | { | ||
879 | int r; | ||
880 | |||
881 | r = 0; | ||
882 | switch (chip->chip_id) { | ||
883 | case KVM_IRQCHIP_PIC_MASTER: | ||
884 | memcpy (&chip->chip.pic, | ||
885 | &pic_irqchip(kvm)->pics[0], | ||
886 | sizeof(struct kvm_pic_state)); | ||
887 | break; | ||
888 | case KVM_IRQCHIP_PIC_SLAVE: | ||
889 | memcpy (&chip->chip.pic, | ||
890 | &pic_irqchip(kvm)->pics[1], | ||
891 | sizeof(struct kvm_pic_state)); | ||
892 | break; | ||
893 | case KVM_IRQCHIP_IOAPIC: | ||
894 | memcpy (&chip->chip.ioapic, | ||
895 | ioapic_irqchip(kvm), | ||
896 | sizeof(struct kvm_ioapic_state)); | ||
897 | break; | ||
898 | default: | ||
899 | r = -EINVAL; | ||
900 | break; | ||
901 | } | ||
902 | return r; | ||
903 | } | ||
904 | |||
905 | static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | ||
906 | { | ||
907 | int r; | ||
908 | |||
909 | r = 0; | ||
910 | switch (chip->chip_id) { | ||
911 | case KVM_IRQCHIP_PIC_MASTER: | ||
912 | memcpy (&pic_irqchip(kvm)->pics[0], | ||
913 | &chip->chip.pic, | ||
914 | sizeof(struct kvm_pic_state)); | ||
915 | break; | ||
916 | case KVM_IRQCHIP_PIC_SLAVE: | ||
917 | memcpy (&pic_irqchip(kvm)->pics[1], | ||
918 | &chip->chip.pic, | ||
919 | sizeof(struct kvm_pic_state)); | ||
920 | break; | ||
921 | case KVM_IRQCHIP_IOAPIC: | ||
922 | memcpy (ioapic_irqchip(kvm), | ||
923 | &chip->chip.ioapic, | ||
924 | sizeof(struct kvm_ioapic_state)); | ||
925 | break; | ||
926 | default: | ||
927 | r = -EINVAL; | ||
928 | break; | ||
929 | } | ||
930 | kvm_pic_update_irq(pic_irqchip(kvm)); | ||
931 | return r; | ||
932 | } | ||
933 | |||
887 | static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | 934 | static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) |
888 | { | 935 | { |
889 | int i; | 936 | int i; |
@@ -930,37 +977,26 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
930 | } | 977 | } |
931 | EXPORT_SYMBOL_GPL(gfn_to_page); | 978 | EXPORT_SYMBOL_GPL(gfn_to_page); |
932 | 979 | ||
980 | /* WARNING: Does not work on aliased pages. */ | ||
933 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | 981 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) |
934 | { | 982 | { |
935 | int i; | ||
936 | struct kvm_memory_slot *memslot; | 983 | struct kvm_memory_slot *memslot; |
937 | unsigned long rel_gfn; | ||
938 | 984 | ||
939 | for (i = 0; i < kvm->nmemslots; ++i) { | 985 | memslot = __gfn_to_memslot(kvm, gfn); |
940 | memslot = &kvm->memslots[i]; | 986 | if (memslot && memslot->dirty_bitmap) { |
941 | 987 | unsigned long rel_gfn = gfn - memslot->base_gfn; | |
942 | if (gfn >= memslot->base_gfn | ||
943 | && gfn < memslot->base_gfn + memslot->npages) { | ||
944 | 988 | ||
945 | if (!memslot->dirty_bitmap) | 989 | /* avoid RMW */ |
946 | return; | 990 | if (!test_bit(rel_gfn, memslot->dirty_bitmap)) |
947 | 991 | set_bit(rel_gfn, memslot->dirty_bitmap); | |
948 | rel_gfn = gfn - memslot->base_gfn; | ||
949 | |||
950 | /* avoid RMW */ | ||
951 | if (!test_bit(rel_gfn, memslot->dirty_bitmap)) | ||
952 | set_bit(rel_gfn, memslot->dirty_bitmap); | ||
953 | return; | ||
954 | } | ||
955 | } | 992 | } |
956 | } | 993 | } |
957 | 994 | ||
958 | static int emulator_read_std(unsigned long addr, | 995 | int emulator_read_std(unsigned long addr, |
959 | void *val, | 996 | void *val, |
960 | unsigned int bytes, | 997 | unsigned int bytes, |
961 | struct x86_emulate_ctxt *ctxt) | 998 | struct kvm_vcpu *vcpu) |
962 | { | 999 | { |
963 | struct kvm_vcpu *vcpu = ctxt->vcpu; | ||
964 | void *data = val; | 1000 | void *data = val; |
965 | 1001 | ||
966 | while (bytes) { | 1002 | while (bytes) { |
@@ -990,26 +1026,42 @@ static int emulator_read_std(unsigned long addr, | |||
990 | 1026 | ||
991 | return X86EMUL_CONTINUE; | 1027 | return X86EMUL_CONTINUE; |
992 | } | 1028 | } |
1029 | EXPORT_SYMBOL_GPL(emulator_read_std); | ||
993 | 1030 | ||
994 | static int emulator_write_std(unsigned long addr, | 1031 | static int emulator_write_std(unsigned long addr, |
995 | const void *val, | 1032 | const void *val, |
996 | unsigned int bytes, | 1033 | unsigned int bytes, |
997 | struct x86_emulate_ctxt *ctxt) | 1034 | struct kvm_vcpu *vcpu) |
998 | { | 1035 | { |
999 | printk(KERN_ERR "emulator_write_std: addr %lx n %d\n", | 1036 | pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes); |
1000 | addr, bytes); | ||
1001 | return X86EMUL_UNHANDLEABLE; | 1037 | return X86EMUL_UNHANDLEABLE; |
1002 | } | 1038 | } |
1003 | 1039 | ||
1040 | /* | ||
1041 | * Only apic need an MMIO device hook, so shortcut now.. | ||
1042 | */ | ||
1043 | static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, | ||
1044 | gpa_t addr) | ||
1045 | { | ||
1046 | struct kvm_io_device *dev; | ||
1047 | |||
1048 | if (vcpu->apic) { | ||
1049 | dev = &vcpu->apic->dev; | ||
1050 | if (dev->in_range(dev, addr)) | ||
1051 | return dev; | ||
1052 | } | ||
1053 | return NULL; | ||
1054 | } | ||
1055 | |||
1004 | static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, | 1056 | static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, |
1005 | gpa_t addr) | 1057 | gpa_t addr) |
1006 | { | 1058 | { |
1007 | /* | 1059 | struct kvm_io_device *dev; |
1008 | * Note that its important to have this wrapper function because | 1060 | |
1009 | * in the very near future we will be checking for MMIOs against | 1061 | dev = vcpu_find_pervcpu_dev(vcpu, addr); |
1010 | * the LAPIC as well as the general MMIO bus | 1062 | if (dev == NULL) |
1011 | */ | 1063 | dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); |
1012 | return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); | 1064 | return dev; |
1013 | } | 1065 | } |
1014 | 1066 | ||
1015 | static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, | 1067 | static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, |
@@ -1021,9 +1073,8 @@ static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, | |||
1021 | static int emulator_read_emulated(unsigned long addr, | 1073 | static int emulator_read_emulated(unsigned long addr, |
1022 | void *val, | 1074 | void *val, |
1023 | unsigned int bytes, | 1075 | unsigned int bytes, |
1024 | struct x86_emulate_ctxt *ctxt) | 1076 | struct kvm_vcpu *vcpu) |
1025 | { | 1077 | { |
1026 | struct kvm_vcpu *vcpu = ctxt->vcpu; | ||
1027 | struct kvm_io_device *mmio_dev; | 1078 | struct kvm_io_device *mmio_dev; |
1028 | gpa_t gpa; | 1079 | gpa_t gpa; |
1029 | 1080 | ||
@@ -1031,7 +1082,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
1031 | memcpy(val, vcpu->mmio_data, bytes); | 1082 | memcpy(val, vcpu->mmio_data, bytes); |
1032 | vcpu->mmio_read_completed = 0; | 1083 | vcpu->mmio_read_completed = 0; |
1033 | return X86EMUL_CONTINUE; | 1084 | return X86EMUL_CONTINUE; |
1034 | } else if (emulator_read_std(addr, val, bytes, ctxt) | 1085 | } else if (emulator_read_std(addr, val, bytes, vcpu) |
1035 | == X86EMUL_CONTINUE) | 1086 | == X86EMUL_CONTINUE) |
1036 | return X86EMUL_CONTINUE; | 1087 | return X86EMUL_CONTINUE; |
1037 | 1088 | ||
@@ -1061,7 +1112,6 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1061 | { | 1112 | { |
1062 | struct page *page; | 1113 | struct page *page; |
1063 | void *virt; | 1114 | void *virt; |
1064 | unsigned offset = offset_in_page(gpa); | ||
1065 | 1115 | ||
1066 | if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) | 1116 | if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) |
1067 | return 0; | 1117 | return 0; |
@@ -1070,7 +1120,7 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1070 | return 0; | 1120 | return 0; |
1071 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); | 1121 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); |
1072 | virt = kmap_atomic(page, KM_USER0); | 1122 | virt = kmap_atomic(page, KM_USER0); |
1073 | kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); | 1123 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
1074 | memcpy(virt + offset_in_page(gpa), val, bytes); | 1124 | memcpy(virt + offset_in_page(gpa), val, bytes); |
1075 | kunmap_atomic(virt, KM_USER0); | 1125 | kunmap_atomic(virt, KM_USER0); |
1076 | return 1; | 1126 | return 1; |
@@ -1079,14 +1129,13 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1079 | static int emulator_write_emulated_onepage(unsigned long addr, | 1129 | static int emulator_write_emulated_onepage(unsigned long addr, |
1080 | const void *val, | 1130 | const void *val, |
1081 | unsigned int bytes, | 1131 | unsigned int bytes, |
1082 | struct x86_emulate_ctxt *ctxt) | 1132 | struct kvm_vcpu *vcpu) |
1083 | { | 1133 | { |
1084 | struct kvm_vcpu *vcpu = ctxt->vcpu; | ||
1085 | struct kvm_io_device *mmio_dev; | 1134 | struct kvm_io_device *mmio_dev; |
1086 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); | 1135 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); |
1087 | 1136 | ||
1088 | if (gpa == UNMAPPED_GVA) { | 1137 | if (gpa == UNMAPPED_GVA) { |
1089 | kvm_arch_ops->inject_page_fault(vcpu, addr, 2); | 1138 | kvm_x86_ops->inject_page_fault(vcpu, addr, 2); |
1090 | return X86EMUL_PROPAGATE_FAULT; | 1139 | return X86EMUL_PROPAGATE_FAULT; |
1091 | } | 1140 | } |
1092 | 1141 | ||
@@ -1111,31 +1160,32 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
1111 | return X86EMUL_CONTINUE; | 1160 | return X86EMUL_CONTINUE; |
1112 | } | 1161 | } |
1113 | 1162 | ||
1114 | static int emulator_write_emulated(unsigned long addr, | 1163 | int emulator_write_emulated(unsigned long addr, |
1115 | const void *val, | 1164 | const void *val, |
1116 | unsigned int bytes, | 1165 | unsigned int bytes, |
1117 | struct x86_emulate_ctxt *ctxt) | 1166 | struct kvm_vcpu *vcpu) |
1118 | { | 1167 | { |
1119 | /* Crossing a page boundary? */ | 1168 | /* Crossing a page boundary? */ |
1120 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 1169 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
1121 | int rc, now; | 1170 | int rc, now; |
1122 | 1171 | ||
1123 | now = -addr & ~PAGE_MASK; | 1172 | now = -addr & ~PAGE_MASK; |
1124 | rc = emulator_write_emulated_onepage(addr, val, now, ctxt); | 1173 | rc = emulator_write_emulated_onepage(addr, val, now, vcpu); |
1125 | if (rc != X86EMUL_CONTINUE) | 1174 | if (rc != X86EMUL_CONTINUE) |
1126 | return rc; | 1175 | return rc; |
1127 | addr += now; | 1176 | addr += now; |
1128 | val += now; | 1177 | val += now; |
1129 | bytes -= now; | 1178 | bytes -= now; |
1130 | } | 1179 | } |
1131 | return emulator_write_emulated_onepage(addr, val, bytes, ctxt); | 1180 | return emulator_write_emulated_onepage(addr, val, bytes, vcpu); |
1132 | } | 1181 | } |
1182 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | ||
1133 | 1183 | ||
1134 | static int emulator_cmpxchg_emulated(unsigned long addr, | 1184 | static int emulator_cmpxchg_emulated(unsigned long addr, |
1135 | const void *old, | 1185 | const void *old, |
1136 | const void *new, | 1186 | const void *new, |
1137 | unsigned int bytes, | 1187 | unsigned int bytes, |
1138 | struct x86_emulate_ctxt *ctxt) | 1188 | struct kvm_vcpu *vcpu) |
1139 | { | 1189 | { |
1140 | static int reported; | 1190 | static int reported; |
1141 | 1191 | ||
@@ -1143,12 +1193,12 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1143 | reported = 1; | 1193 | reported = 1; |
1144 | printk(KERN_WARNING "kvm: emulating exchange as write\n"); | 1194 | printk(KERN_WARNING "kvm: emulating exchange as write\n"); |
1145 | } | 1195 | } |
1146 | return emulator_write_emulated(addr, new, bytes, ctxt); | 1196 | return emulator_write_emulated(addr, new, bytes, vcpu); |
1147 | } | 1197 | } |
1148 | 1198 | ||
1149 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 1199 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
1150 | { | 1200 | { |
1151 | return kvm_arch_ops->get_segment_base(vcpu, seg); | 1201 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
1152 | } | 1202 | } |
1153 | 1203 | ||
1154 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | 1204 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) |
@@ -1158,10 +1208,8 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
1158 | 1208 | ||
1159 | int emulate_clts(struct kvm_vcpu *vcpu) | 1209 | int emulate_clts(struct kvm_vcpu *vcpu) |
1160 | { | 1210 | { |
1161 | unsigned long cr0; | 1211 | vcpu->cr0 &= ~X86_CR0_TS; |
1162 | 1212 | kvm_x86_ops->set_cr0(vcpu, vcpu->cr0); | |
1163 | cr0 = vcpu->cr0 & ~CR0_TS_MASK; | ||
1164 | kvm_arch_ops->set_cr0(vcpu, cr0); | ||
1165 | return X86EMUL_CONTINUE; | 1213 | return X86EMUL_CONTINUE; |
1166 | } | 1214 | } |
1167 | 1215 | ||
@@ -1171,11 +1219,10 @@ int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest) | |||
1171 | 1219 | ||
1172 | switch (dr) { | 1220 | switch (dr) { |
1173 | case 0 ... 3: | 1221 | case 0 ... 3: |
1174 | *dest = kvm_arch_ops->get_dr(vcpu, dr); | 1222 | *dest = kvm_x86_ops->get_dr(vcpu, dr); |
1175 | return X86EMUL_CONTINUE; | 1223 | return X86EMUL_CONTINUE; |
1176 | default: | 1224 | default: |
1177 | printk(KERN_DEBUG "%s: unexpected dr %u\n", | 1225 | pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); |
1178 | __FUNCTION__, dr); | ||
1179 | return X86EMUL_UNHANDLEABLE; | 1226 | return X86EMUL_UNHANDLEABLE; |
1180 | } | 1227 | } |
1181 | } | 1228 | } |
@@ -1185,7 +1232,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | |||
1185 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 1232 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
1186 | int exception; | 1233 | int exception; |
1187 | 1234 | ||
1188 | kvm_arch_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); | 1235 | kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); |
1189 | if (exception) { | 1236 | if (exception) { |
1190 | /* FIXME: better handling */ | 1237 | /* FIXME: better handling */ |
1191 | return X86EMUL_UNHANDLEABLE; | 1238 | return X86EMUL_UNHANDLEABLE; |
@@ -1193,25 +1240,25 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | |||
1193 | return X86EMUL_CONTINUE; | 1240 | return X86EMUL_CONTINUE; |
1194 | } | 1241 | } |
1195 | 1242 | ||
1196 | static void report_emulation_failure(struct x86_emulate_ctxt *ctxt) | 1243 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
1197 | { | 1244 | { |
1198 | static int reported; | 1245 | static int reported; |
1199 | u8 opcodes[4]; | 1246 | u8 opcodes[4]; |
1200 | unsigned long rip = ctxt->vcpu->rip; | 1247 | unsigned long rip = vcpu->rip; |
1201 | unsigned long rip_linear; | 1248 | unsigned long rip_linear; |
1202 | 1249 | ||
1203 | rip_linear = rip + get_segment_base(ctxt->vcpu, VCPU_SREG_CS); | 1250 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); |
1204 | 1251 | ||
1205 | if (reported) | 1252 | if (reported) |
1206 | return; | 1253 | return; |
1207 | 1254 | ||
1208 | emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt); | 1255 | emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); |
1209 | 1256 | ||
1210 | printk(KERN_ERR "emulation failed but !mmio_needed?" | 1257 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", |
1211 | " rip %lx %02x %02x %02x %02x\n", | 1258 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); |
1212 | rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | ||
1213 | reported = 1; | 1259 | reported = 1; |
1214 | } | 1260 | } |
1261 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | ||
1215 | 1262 | ||
1216 | struct x86_emulate_ops emulate_ops = { | 1263 | struct x86_emulate_ops emulate_ops = { |
1217 | .read_std = emulator_read_std, | 1264 | .read_std = emulator_read_std, |
@@ -1231,12 +1278,12 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1231 | int cs_db, cs_l; | 1278 | int cs_db, cs_l; |
1232 | 1279 | ||
1233 | vcpu->mmio_fault_cr2 = cr2; | 1280 | vcpu->mmio_fault_cr2 = cr2; |
1234 | kvm_arch_ops->cache_regs(vcpu); | 1281 | kvm_x86_ops->cache_regs(vcpu); |
1235 | 1282 | ||
1236 | kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 1283 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
1237 | 1284 | ||
1238 | emulate_ctxt.vcpu = vcpu; | 1285 | emulate_ctxt.vcpu = vcpu; |
1239 | emulate_ctxt.eflags = kvm_arch_ops->get_rflags(vcpu); | 1286 | emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
1240 | emulate_ctxt.cr2 = cr2; | 1287 | emulate_ctxt.cr2 = cr2; |
1241 | emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) | 1288 | emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) |
1242 | ? X86EMUL_MODE_REAL : cs_l | 1289 | ? X86EMUL_MODE_REAL : cs_l |
@@ -1259,9 +1306,13 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1259 | emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); | 1306 | emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); |
1260 | 1307 | ||
1261 | vcpu->mmio_is_write = 0; | 1308 | vcpu->mmio_is_write = 0; |
1309 | vcpu->pio.string = 0; | ||
1262 | r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); | 1310 | r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); |
1311 | if (vcpu->pio.string) | ||
1312 | return EMULATE_DO_MMIO; | ||
1263 | 1313 | ||
1264 | if ((r || vcpu->mmio_is_write) && run) { | 1314 | if ((r || vcpu->mmio_is_write) && run) { |
1315 | run->exit_reason = KVM_EXIT_MMIO; | ||
1265 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 1316 | run->mmio.phys_addr = vcpu->mmio_phys_addr; |
1266 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 1317 | memcpy(run->mmio.data, vcpu->mmio_data, 8); |
1267 | run->mmio.len = vcpu->mmio_size; | 1318 | run->mmio.len = vcpu->mmio_size; |
@@ -1272,14 +1323,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1272 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 1323 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
1273 | return EMULATE_DONE; | 1324 | return EMULATE_DONE; |
1274 | if (!vcpu->mmio_needed) { | 1325 | if (!vcpu->mmio_needed) { |
1275 | report_emulation_failure(&emulate_ctxt); | 1326 | kvm_report_emulation_failure(vcpu, "mmio"); |
1276 | return EMULATE_FAIL; | 1327 | return EMULATE_FAIL; |
1277 | } | 1328 | } |
1278 | return EMULATE_DO_MMIO; | 1329 | return EMULATE_DO_MMIO; |
1279 | } | 1330 | } |
1280 | 1331 | ||
1281 | kvm_arch_ops->decache_regs(vcpu); | 1332 | kvm_x86_ops->decache_regs(vcpu); |
1282 | kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); | 1333 | kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags); |
1283 | 1334 | ||
1284 | if (vcpu->mmio_is_write) { | 1335 | if (vcpu->mmio_is_write) { |
1285 | vcpu->mmio_needed = 0; | 1336 | vcpu->mmio_needed = 0; |
@@ -1290,14 +1341,45 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1290 | } | 1341 | } |
1291 | EXPORT_SYMBOL_GPL(emulate_instruction); | 1342 | EXPORT_SYMBOL_GPL(emulate_instruction); |
1292 | 1343 | ||
1293 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | 1344 | /* |
1345 | * The vCPU has executed a HLT instruction with in-kernel mode enabled. | ||
1346 | */ | ||
1347 | static void kvm_vcpu_block(struct kvm_vcpu *vcpu) | ||
1294 | { | 1348 | { |
1295 | if (vcpu->irq_summary) | 1349 | DECLARE_WAITQUEUE(wait, current); |
1296 | return 1; | ||
1297 | 1350 | ||
1298 | vcpu->run->exit_reason = KVM_EXIT_HLT; | 1351 | add_wait_queue(&vcpu->wq, &wait); |
1352 | |||
1353 | /* | ||
1354 | * We will block until either an interrupt or a signal wakes us up | ||
1355 | */ | ||
1356 | while (!kvm_cpu_has_interrupt(vcpu) | ||
1357 | && !signal_pending(current) | ||
1358 | && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE | ||
1359 | && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) { | ||
1360 | set_current_state(TASK_INTERRUPTIBLE); | ||
1361 | vcpu_put(vcpu); | ||
1362 | schedule(); | ||
1363 | vcpu_load(vcpu); | ||
1364 | } | ||
1365 | |||
1366 | __set_current_state(TASK_RUNNING); | ||
1367 | remove_wait_queue(&vcpu->wq, &wait); | ||
1368 | } | ||
1369 | |||
1370 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | ||
1371 | { | ||
1299 | ++vcpu->stat.halt_exits; | 1372 | ++vcpu->stat.halt_exits; |
1300 | return 0; | 1373 | if (irqchip_in_kernel(vcpu->kvm)) { |
1374 | vcpu->mp_state = VCPU_MP_STATE_HALTED; | ||
1375 | kvm_vcpu_block(vcpu); | ||
1376 | if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE) | ||
1377 | return -EINTR; | ||
1378 | return 1; | ||
1379 | } else { | ||
1380 | vcpu->run->exit_reason = KVM_EXIT_HLT; | ||
1381 | return 0; | ||
1382 | } | ||
1301 | } | 1383 | } |
1302 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); | 1384 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); |
1303 | 1385 | ||
@@ -1305,7 +1387,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1305 | { | 1387 | { |
1306 | unsigned long nr, a0, a1, a2, a3, a4, a5, ret; | 1388 | unsigned long nr, a0, a1, a2, a3, a4, a5, ret; |
1307 | 1389 | ||
1308 | kvm_arch_ops->cache_regs(vcpu); | 1390 | kvm_x86_ops->cache_regs(vcpu); |
1309 | ret = -KVM_EINVAL; | 1391 | ret = -KVM_EINVAL; |
1310 | #ifdef CONFIG_X86_64 | 1392 | #ifdef CONFIG_X86_64 |
1311 | if (is_long_mode(vcpu)) { | 1393 | if (is_long_mode(vcpu)) { |
@@ -1329,6 +1411,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1329 | } | 1411 | } |
1330 | switch (nr) { | 1412 | switch (nr) { |
1331 | default: | 1413 | default: |
1414 | run->hypercall.nr = nr; | ||
1332 | run->hypercall.args[0] = a0; | 1415 | run->hypercall.args[0] = a0; |
1333 | run->hypercall.args[1] = a1; | 1416 | run->hypercall.args[1] = a1; |
1334 | run->hypercall.args[2] = a2; | 1417 | run->hypercall.args[2] = a2; |
@@ -1337,11 +1420,11 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1337 | run->hypercall.args[5] = a5; | 1420 | run->hypercall.args[5] = a5; |
1338 | run->hypercall.ret = ret; | 1421 | run->hypercall.ret = ret; |
1339 | run->hypercall.longmode = is_long_mode(vcpu); | 1422 | run->hypercall.longmode = is_long_mode(vcpu); |
1340 | kvm_arch_ops->decache_regs(vcpu); | 1423 | kvm_x86_ops->decache_regs(vcpu); |
1341 | return 0; | 1424 | return 0; |
1342 | } | 1425 | } |
1343 | vcpu->regs[VCPU_REGS_RAX] = ret; | 1426 | vcpu->regs[VCPU_REGS_RAX] = ret; |
1344 | kvm_arch_ops->decache_regs(vcpu); | 1427 | kvm_x86_ops->decache_regs(vcpu); |
1345 | return 1; | 1428 | return 1; |
1346 | } | 1429 | } |
1347 | EXPORT_SYMBOL_GPL(kvm_hypercall); | 1430 | EXPORT_SYMBOL_GPL(kvm_hypercall); |
@@ -1355,26 +1438,26 @@ void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | |||
1355 | { | 1438 | { |
1356 | struct descriptor_table dt = { limit, base }; | 1439 | struct descriptor_table dt = { limit, base }; |
1357 | 1440 | ||
1358 | kvm_arch_ops->set_gdt(vcpu, &dt); | 1441 | kvm_x86_ops->set_gdt(vcpu, &dt); |
1359 | } | 1442 | } |
1360 | 1443 | ||
1361 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 1444 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
1362 | { | 1445 | { |
1363 | struct descriptor_table dt = { limit, base }; | 1446 | struct descriptor_table dt = { limit, base }; |
1364 | 1447 | ||
1365 | kvm_arch_ops->set_idt(vcpu, &dt); | 1448 | kvm_x86_ops->set_idt(vcpu, &dt); |
1366 | } | 1449 | } |
1367 | 1450 | ||
1368 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | 1451 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, |
1369 | unsigned long *rflags) | 1452 | unsigned long *rflags) |
1370 | { | 1453 | { |
1371 | lmsw(vcpu, msw); | 1454 | lmsw(vcpu, msw); |
1372 | *rflags = kvm_arch_ops->get_rflags(vcpu); | 1455 | *rflags = kvm_x86_ops->get_rflags(vcpu); |
1373 | } | 1456 | } |
1374 | 1457 | ||
1375 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 1458 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
1376 | { | 1459 | { |
1377 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); | 1460 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
1378 | switch (cr) { | 1461 | switch (cr) { |
1379 | case 0: | 1462 | case 0: |
1380 | return vcpu->cr0; | 1463 | return vcpu->cr0; |
@@ -1396,7 +1479,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
1396 | switch (cr) { | 1479 | switch (cr) { |
1397 | case 0: | 1480 | case 0: |
1398 | set_cr0(vcpu, mk_cr_64(vcpu->cr0, val)); | 1481 | set_cr0(vcpu, mk_cr_64(vcpu->cr0, val)); |
1399 | *rflags = kvm_arch_ops->get_rflags(vcpu); | 1482 | *rflags = kvm_x86_ops->get_rflags(vcpu); |
1400 | break; | 1483 | break; |
1401 | case 2: | 1484 | case 2: |
1402 | vcpu->cr2 = val; | 1485 | vcpu->cr2 = val; |
@@ -1439,7 +1522,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) | |||
1439 | 1522 | ||
1440 | mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); | 1523 | mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); |
1441 | para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); | 1524 | para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); |
1442 | para_state = kmap_atomic(para_state_page, KM_USER0); | 1525 | para_state = kmap(para_state_page); |
1443 | 1526 | ||
1444 | printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); | 1527 | printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); |
1445 | printk(KERN_DEBUG ".... size: %d\n", para_state->size); | 1528 | printk(KERN_DEBUG ".... size: %d\n", para_state->size); |
@@ -1470,12 +1553,12 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) | |||
1470 | mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); | 1553 | mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); |
1471 | hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), | 1554 | hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), |
1472 | KM_USER1) + (hypercall_hpa & ~PAGE_MASK); | 1555 | KM_USER1) + (hypercall_hpa & ~PAGE_MASK); |
1473 | kvm_arch_ops->patch_hypercall(vcpu, hypercall); | 1556 | kvm_x86_ops->patch_hypercall(vcpu, hypercall); |
1474 | kunmap_atomic(hypercall, KM_USER1); | 1557 | kunmap_atomic(hypercall, KM_USER1); |
1475 | 1558 | ||
1476 | para_state->ret = 0; | 1559 | para_state->ret = 0; |
1477 | err_kunmap_skip: | 1560 | err_kunmap_skip: |
1478 | kunmap_atomic(para_state, KM_USER0); | 1561 | kunmap(para_state_page); |
1479 | return 0; | 1562 | return 0; |
1480 | err_gp: | 1563 | err_gp: |
1481 | return 1; | 1564 | return 1; |
@@ -1511,7 +1594,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1511 | data = 3; | 1594 | data = 3; |
1512 | break; | 1595 | break; |
1513 | case MSR_IA32_APICBASE: | 1596 | case MSR_IA32_APICBASE: |
1514 | data = vcpu->apic_base; | 1597 | data = kvm_get_apic_base(vcpu); |
1515 | break; | 1598 | break; |
1516 | case MSR_IA32_MISC_ENABLE: | 1599 | case MSR_IA32_MISC_ENABLE: |
1517 | data = vcpu->ia32_misc_enable_msr; | 1600 | data = vcpu->ia32_misc_enable_msr; |
@@ -1522,7 +1605,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1522 | break; | 1605 | break; |
1523 | #endif | 1606 | #endif |
1524 | default: | 1607 | default: |
1525 | printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", msr); | 1608 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
1526 | return 1; | 1609 | return 1; |
1527 | } | 1610 | } |
1528 | *pdata = data; | 1611 | *pdata = data; |
@@ -1537,7 +1620,7 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common); | |||
1537 | */ | 1620 | */ |
1538 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 1621 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
1539 | { | 1622 | { |
1540 | return kvm_arch_ops->get_msr(vcpu, msr_index, pdata); | 1623 | return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); |
1541 | } | 1624 | } |
1542 | 1625 | ||
1543 | #ifdef CONFIG_X86_64 | 1626 | #ifdef CONFIG_X86_64 |
@@ -1558,7 +1641,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
1558 | return; | 1641 | return; |
1559 | } | 1642 | } |
1560 | 1643 | ||
1561 | kvm_arch_ops->set_efer(vcpu, efer); | 1644 | kvm_x86_ops->set_efer(vcpu, efer); |
1562 | 1645 | ||
1563 | efer &= ~EFER_LMA; | 1646 | efer &= ~EFER_LMA; |
1564 | efer |= vcpu->shadow_efer & EFER_LMA; | 1647 | efer |= vcpu->shadow_efer & EFER_LMA; |
@@ -1577,11 +1660,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1577 | break; | 1660 | break; |
1578 | #endif | 1661 | #endif |
1579 | case MSR_IA32_MC0_STATUS: | 1662 | case MSR_IA32_MC0_STATUS: |
1580 | printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", | 1663 | pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", |
1581 | __FUNCTION__, data); | 1664 | __FUNCTION__, data); |
1582 | break; | 1665 | break; |
1583 | case MSR_IA32_MCG_STATUS: | 1666 | case MSR_IA32_MCG_STATUS: |
1584 | printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", | 1667 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", |
1585 | __FUNCTION__, data); | 1668 | __FUNCTION__, data); |
1586 | break; | 1669 | break; |
1587 | case MSR_IA32_UCODE_REV: | 1670 | case MSR_IA32_UCODE_REV: |
@@ -1589,7 +1672,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1589 | case 0x200 ... 0x2ff: /* MTRRs */ | 1672 | case 0x200 ... 0x2ff: /* MTRRs */ |
1590 | break; | 1673 | break; |
1591 | case MSR_IA32_APICBASE: | 1674 | case MSR_IA32_APICBASE: |
1592 | vcpu->apic_base = data; | 1675 | kvm_set_apic_base(vcpu, data); |
1593 | break; | 1676 | break; |
1594 | case MSR_IA32_MISC_ENABLE: | 1677 | case MSR_IA32_MISC_ENABLE: |
1595 | vcpu->ia32_misc_enable_msr = data; | 1678 | vcpu->ia32_misc_enable_msr = data; |
@@ -1601,7 +1684,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1601 | return vcpu_register_para(vcpu, data); | 1684 | return vcpu_register_para(vcpu, data); |
1602 | 1685 | ||
1603 | default: | 1686 | default: |
1604 | printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr); | 1687 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr); |
1605 | return 1; | 1688 | return 1; |
1606 | } | 1689 | } |
1607 | return 0; | 1690 | return 0; |
@@ -1615,44 +1698,24 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common); | |||
1615 | */ | 1698 | */ |
1616 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 1699 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
1617 | { | 1700 | { |
1618 | return kvm_arch_ops->set_msr(vcpu, msr_index, data); | 1701 | return kvm_x86_ops->set_msr(vcpu, msr_index, data); |
1619 | } | 1702 | } |
1620 | 1703 | ||
1621 | void kvm_resched(struct kvm_vcpu *vcpu) | 1704 | void kvm_resched(struct kvm_vcpu *vcpu) |
1622 | { | 1705 | { |
1623 | if (!need_resched()) | 1706 | if (!need_resched()) |
1624 | return; | 1707 | return; |
1625 | vcpu_put(vcpu); | ||
1626 | cond_resched(); | 1708 | cond_resched(); |
1627 | vcpu_load(vcpu); | ||
1628 | } | 1709 | } |
1629 | EXPORT_SYMBOL_GPL(kvm_resched); | 1710 | EXPORT_SYMBOL_GPL(kvm_resched); |
1630 | 1711 | ||
1631 | void load_msrs(struct vmx_msr_entry *e, int n) | ||
1632 | { | ||
1633 | int i; | ||
1634 | |||
1635 | for (i = 0; i < n; ++i) | ||
1636 | wrmsrl(e[i].index, e[i].data); | ||
1637 | } | ||
1638 | EXPORT_SYMBOL_GPL(load_msrs); | ||
1639 | |||
1640 | void save_msrs(struct vmx_msr_entry *e, int n) | ||
1641 | { | ||
1642 | int i; | ||
1643 | |||
1644 | for (i = 0; i < n; ++i) | ||
1645 | rdmsrl(e[i].index, e[i].data); | ||
1646 | } | ||
1647 | EXPORT_SYMBOL_GPL(save_msrs); | ||
1648 | |||
1649 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | 1712 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) |
1650 | { | 1713 | { |
1651 | int i; | 1714 | int i; |
1652 | u32 function; | 1715 | u32 function; |
1653 | struct kvm_cpuid_entry *e, *best; | 1716 | struct kvm_cpuid_entry *e, *best; |
1654 | 1717 | ||
1655 | kvm_arch_ops->cache_regs(vcpu); | 1718 | kvm_x86_ops->cache_regs(vcpu); |
1656 | function = vcpu->regs[VCPU_REGS_RAX]; | 1719 | function = vcpu->regs[VCPU_REGS_RAX]; |
1657 | vcpu->regs[VCPU_REGS_RAX] = 0; | 1720 | vcpu->regs[VCPU_REGS_RAX] = 0; |
1658 | vcpu->regs[VCPU_REGS_RBX] = 0; | 1721 | vcpu->regs[VCPU_REGS_RBX] = 0; |
@@ -1678,8 +1741,8 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
1678 | vcpu->regs[VCPU_REGS_RCX] = best->ecx; | 1741 | vcpu->regs[VCPU_REGS_RCX] = best->ecx; |
1679 | vcpu->regs[VCPU_REGS_RDX] = best->edx; | 1742 | vcpu->regs[VCPU_REGS_RDX] = best->edx; |
1680 | } | 1743 | } |
1681 | kvm_arch_ops->decache_regs(vcpu); | 1744 | kvm_x86_ops->decache_regs(vcpu); |
1682 | kvm_arch_ops->skip_emulated_instruction(vcpu); | 1745 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
1683 | } | 1746 | } |
1684 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 1747 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
1685 | 1748 | ||
@@ -1690,11 +1753,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) | |||
1690 | unsigned bytes; | 1753 | unsigned bytes; |
1691 | int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; | 1754 | int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; |
1692 | 1755 | ||
1693 | kvm_arch_ops->vcpu_put(vcpu); | ||
1694 | q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, | 1756 | q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, |
1695 | PAGE_KERNEL); | 1757 | PAGE_KERNEL); |
1696 | if (!q) { | 1758 | if (!q) { |
1697 | kvm_arch_ops->vcpu_load(vcpu); | ||
1698 | free_pio_guest_pages(vcpu); | 1759 | free_pio_guest_pages(vcpu); |
1699 | return -ENOMEM; | 1760 | return -ENOMEM; |
1700 | } | 1761 | } |
@@ -1706,7 +1767,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) | |||
1706 | memcpy(p, q, bytes); | 1767 | memcpy(p, q, bytes); |
1707 | q -= vcpu->pio.guest_page_offset; | 1768 | q -= vcpu->pio.guest_page_offset; |
1708 | vunmap(q); | 1769 | vunmap(q); |
1709 | kvm_arch_ops->vcpu_load(vcpu); | ||
1710 | free_pio_guest_pages(vcpu); | 1770 | free_pio_guest_pages(vcpu); |
1711 | return 0; | 1771 | return 0; |
1712 | } | 1772 | } |
@@ -1717,7 +1777,7 @@ static int complete_pio(struct kvm_vcpu *vcpu) | |||
1717 | long delta; | 1777 | long delta; |
1718 | int r; | 1778 | int r; |
1719 | 1779 | ||
1720 | kvm_arch_ops->cache_regs(vcpu); | 1780 | kvm_x86_ops->cache_regs(vcpu); |
1721 | 1781 | ||
1722 | if (!io->string) { | 1782 | if (!io->string) { |
1723 | if (io->in) | 1783 | if (io->in) |
@@ -1727,7 +1787,7 @@ static int complete_pio(struct kvm_vcpu *vcpu) | |||
1727 | if (io->in) { | 1787 | if (io->in) { |
1728 | r = pio_copy_data(vcpu); | 1788 | r = pio_copy_data(vcpu); |
1729 | if (r) { | 1789 | if (r) { |
1730 | kvm_arch_ops->cache_regs(vcpu); | 1790 | kvm_x86_ops->cache_regs(vcpu); |
1731 | return r; | 1791 | return r; |
1732 | } | 1792 | } |
1733 | } | 1793 | } |
@@ -1750,79 +1810,109 @@ static int complete_pio(struct kvm_vcpu *vcpu) | |||
1750 | vcpu->regs[VCPU_REGS_RSI] += delta; | 1810 | vcpu->regs[VCPU_REGS_RSI] += delta; |
1751 | } | 1811 | } |
1752 | 1812 | ||
1753 | kvm_arch_ops->decache_regs(vcpu); | 1813 | kvm_x86_ops->decache_regs(vcpu); |
1754 | 1814 | ||
1755 | io->count -= io->cur_count; | 1815 | io->count -= io->cur_count; |
1756 | io->cur_count = 0; | 1816 | io->cur_count = 0; |
1757 | 1817 | ||
1758 | if (!io->count) | ||
1759 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
1760 | return 0; | 1818 | return 0; |
1761 | } | 1819 | } |
1762 | 1820 | ||
1763 | void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu) | 1821 | static void kernel_pio(struct kvm_io_device *pio_dev, |
1822 | struct kvm_vcpu *vcpu, | ||
1823 | void *pd) | ||
1764 | { | 1824 | { |
1765 | /* TODO: String I/O for in kernel device */ | 1825 | /* TODO: String I/O for in kernel device */ |
1766 | 1826 | ||
1827 | mutex_lock(&vcpu->kvm->lock); | ||
1767 | if (vcpu->pio.in) | 1828 | if (vcpu->pio.in) |
1768 | kvm_iodevice_read(pio_dev, vcpu->pio.port, | 1829 | kvm_iodevice_read(pio_dev, vcpu->pio.port, |
1769 | vcpu->pio.size, | 1830 | vcpu->pio.size, |
1770 | vcpu->pio_data); | 1831 | pd); |
1771 | else | 1832 | else |
1772 | kvm_iodevice_write(pio_dev, vcpu->pio.port, | 1833 | kvm_iodevice_write(pio_dev, vcpu->pio.port, |
1773 | vcpu->pio.size, | 1834 | vcpu->pio.size, |
1774 | vcpu->pio_data); | 1835 | pd); |
1836 | mutex_unlock(&vcpu->kvm->lock); | ||
1837 | } | ||
1838 | |||
1839 | static void pio_string_write(struct kvm_io_device *pio_dev, | ||
1840 | struct kvm_vcpu *vcpu) | ||
1841 | { | ||
1842 | struct kvm_pio_request *io = &vcpu->pio; | ||
1843 | void *pd = vcpu->pio_data; | ||
1844 | int i; | ||
1845 | |||
1846 | mutex_lock(&vcpu->kvm->lock); | ||
1847 | for (i = 0; i < io->cur_count; i++) { | ||
1848 | kvm_iodevice_write(pio_dev, io->port, | ||
1849 | io->size, | ||
1850 | pd); | ||
1851 | pd += io->size; | ||
1852 | } | ||
1853 | mutex_unlock(&vcpu->kvm->lock); | ||
1775 | } | 1854 | } |
1776 | 1855 | ||
1777 | int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 1856 | int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, |
1778 | int size, unsigned long count, int string, int down, | 1857 | int size, unsigned port) |
1858 | { | ||
1859 | struct kvm_io_device *pio_dev; | ||
1860 | |||
1861 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
1862 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
1863 | vcpu->run->io.size = vcpu->pio.size = size; | ||
1864 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
1865 | vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1; | ||
1866 | vcpu->run->io.port = vcpu->pio.port = port; | ||
1867 | vcpu->pio.in = in; | ||
1868 | vcpu->pio.string = 0; | ||
1869 | vcpu->pio.down = 0; | ||
1870 | vcpu->pio.guest_page_offset = 0; | ||
1871 | vcpu->pio.rep = 0; | ||
1872 | |||
1873 | kvm_x86_ops->cache_regs(vcpu); | ||
1874 | memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); | ||
1875 | kvm_x86_ops->decache_regs(vcpu); | ||
1876 | |||
1877 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
1878 | |||
1879 | pio_dev = vcpu_find_pio_dev(vcpu, port); | ||
1880 | if (pio_dev) { | ||
1881 | kernel_pio(pio_dev, vcpu, vcpu->pio_data); | ||
1882 | complete_pio(vcpu); | ||
1883 | return 1; | ||
1884 | } | ||
1885 | return 0; | ||
1886 | } | ||
1887 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | ||
1888 | |||
1889 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||
1890 | int size, unsigned long count, int down, | ||
1779 | gva_t address, int rep, unsigned port) | 1891 | gva_t address, int rep, unsigned port) |
1780 | { | 1892 | { |
1781 | unsigned now, in_page; | 1893 | unsigned now, in_page; |
1782 | int i; | 1894 | int i, ret = 0; |
1783 | int nr_pages = 1; | 1895 | int nr_pages = 1; |
1784 | struct page *page; | 1896 | struct page *page; |
1785 | struct kvm_io_device *pio_dev; | 1897 | struct kvm_io_device *pio_dev; |
1786 | 1898 | ||
1787 | vcpu->run->exit_reason = KVM_EXIT_IO; | 1899 | vcpu->run->exit_reason = KVM_EXIT_IO; |
1788 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 1900 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
1789 | vcpu->run->io.size = size; | 1901 | vcpu->run->io.size = vcpu->pio.size = size; |
1790 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | 1902 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; |
1791 | vcpu->run->io.count = count; | 1903 | vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count; |
1792 | vcpu->run->io.port = port; | 1904 | vcpu->run->io.port = vcpu->pio.port = port; |
1793 | vcpu->pio.count = count; | ||
1794 | vcpu->pio.cur_count = count; | ||
1795 | vcpu->pio.size = size; | ||
1796 | vcpu->pio.in = in; | 1905 | vcpu->pio.in = in; |
1797 | vcpu->pio.port = port; | 1906 | vcpu->pio.string = 1; |
1798 | vcpu->pio.string = string; | ||
1799 | vcpu->pio.down = down; | 1907 | vcpu->pio.down = down; |
1800 | vcpu->pio.guest_page_offset = offset_in_page(address); | 1908 | vcpu->pio.guest_page_offset = offset_in_page(address); |
1801 | vcpu->pio.rep = rep; | 1909 | vcpu->pio.rep = rep; |
1802 | 1910 | ||
1803 | pio_dev = vcpu_find_pio_dev(vcpu, port); | ||
1804 | if (!string) { | ||
1805 | kvm_arch_ops->cache_regs(vcpu); | ||
1806 | memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); | ||
1807 | kvm_arch_ops->decache_regs(vcpu); | ||
1808 | if (pio_dev) { | ||
1809 | kernel_pio(pio_dev, vcpu); | ||
1810 | complete_pio(vcpu); | ||
1811 | return 1; | ||
1812 | } | ||
1813 | return 0; | ||
1814 | } | ||
1815 | /* TODO: String I/O for in kernel device */ | ||
1816 | if (pio_dev) | ||
1817 | printk(KERN_ERR "kvm_setup_pio: no string io support\n"); | ||
1818 | |||
1819 | if (!count) { | 1911 | if (!count) { |
1820 | kvm_arch_ops->skip_emulated_instruction(vcpu); | 1912 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
1821 | return 1; | 1913 | return 1; |
1822 | } | 1914 | } |
1823 | 1915 | ||
1824 | now = min(count, PAGE_SIZE / size); | ||
1825 | |||
1826 | if (!down) | 1916 | if (!down) |
1827 | in_page = PAGE_SIZE - offset_in_page(address); | 1917 | in_page = PAGE_SIZE - offset_in_page(address); |
1828 | else | 1918 | else |
@@ -1841,20 +1931,23 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
1841 | /* | 1931 | /* |
1842 | * String I/O in reverse. Yuck. Kill the guest, fix later. | 1932 | * String I/O in reverse. Yuck. Kill the guest, fix later. |
1843 | */ | 1933 | */ |
1844 | printk(KERN_ERR "kvm: guest string pio down\n"); | 1934 | pr_unimpl(vcpu, "guest string pio down\n"); |
1845 | inject_gp(vcpu); | 1935 | inject_gp(vcpu); |
1846 | return 1; | 1936 | return 1; |
1847 | } | 1937 | } |
1848 | vcpu->run->io.count = now; | 1938 | vcpu->run->io.count = now; |
1849 | vcpu->pio.cur_count = now; | 1939 | vcpu->pio.cur_count = now; |
1850 | 1940 | ||
1941 | if (vcpu->pio.cur_count == vcpu->pio.count) | ||
1942 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
1943 | |||
1851 | for (i = 0; i < nr_pages; ++i) { | 1944 | for (i = 0; i < nr_pages; ++i) { |
1852 | spin_lock(&vcpu->kvm->lock); | 1945 | mutex_lock(&vcpu->kvm->lock); |
1853 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); | 1946 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); |
1854 | if (page) | 1947 | if (page) |
1855 | get_page(page); | 1948 | get_page(page); |
1856 | vcpu->pio.guest_pages[i] = page; | 1949 | vcpu->pio.guest_pages[i] = page; |
1857 | spin_unlock(&vcpu->kvm->lock); | 1950 | mutex_unlock(&vcpu->kvm->lock); |
1858 | if (!page) { | 1951 | if (!page) { |
1859 | inject_gp(vcpu); | 1952 | inject_gp(vcpu); |
1860 | free_pio_guest_pages(vcpu); | 1953 | free_pio_guest_pages(vcpu); |
@@ -1862,11 +1955,145 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
1862 | } | 1955 | } |
1863 | } | 1956 | } |
1864 | 1957 | ||
1865 | if (!vcpu->pio.in) | 1958 | pio_dev = vcpu_find_pio_dev(vcpu, port); |
1866 | return pio_copy_data(vcpu); | 1959 | if (!vcpu->pio.in) { |
1867 | return 0; | 1960 | /* string PIO write */ |
1961 | ret = pio_copy_data(vcpu); | ||
1962 | if (ret >= 0 && pio_dev) { | ||
1963 | pio_string_write(pio_dev, vcpu); | ||
1964 | complete_pio(vcpu); | ||
1965 | if (vcpu->pio.count == 0) | ||
1966 | ret = 1; | ||
1967 | } | ||
1968 | } else if (pio_dev) | ||
1969 | pr_unimpl(vcpu, "no string pio read support yet, " | ||
1970 | "port %x size %d count %ld\n", | ||
1971 | port, size, count); | ||
1972 | |||
1973 | return ret; | ||
1974 | } | ||
1975 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | ||
1976 | |||
1977 | /* | ||
1978 | * Check if userspace requested an interrupt window, and that the | ||
1979 | * interrupt window is open. | ||
1980 | * | ||
1981 | * No need to exit to userspace if we already have an interrupt queued. | ||
1982 | */ | ||
1983 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | ||
1984 | struct kvm_run *kvm_run) | ||
1985 | { | ||
1986 | return (!vcpu->irq_summary && | ||
1987 | kvm_run->request_interrupt_window && | ||
1988 | vcpu->interrupt_window_open && | ||
1989 | (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF)); | ||
1990 | } | ||
1991 | |||
1992 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | ||
1993 | struct kvm_run *kvm_run) | ||
1994 | { | ||
1995 | kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | ||
1996 | kvm_run->cr8 = get_cr8(vcpu); | ||
1997 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | ||
1998 | if (irqchip_in_kernel(vcpu->kvm)) | ||
1999 | kvm_run->ready_for_interrupt_injection = 1; | ||
2000 | else | ||
2001 | kvm_run->ready_for_interrupt_injection = | ||
2002 | (vcpu->interrupt_window_open && | ||
2003 | vcpu->irq_summary == 0); | ||
2004 | } | ||
2005 | |||
2006 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2007 | { | ||
2008 | int r; | ||
2009 | |||
2010 | if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { | ||
2011 | printk("vcpu %d received sipi with vector # %x\n", | ||
2012 | vcpu->vcpu_id, vcpu->sipi_vector); | ||
2013 | kvm_lapic_reset(vcpu); | ||
2014 | kvm_x86_ops->vcpu_reset(vcpu); | ||
2015 | vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; | ||
2016 | } | ||
2017 | |||
2018 | preempted: | ||
2019 | if (vcpu->guest_debug.enabled) | ||
2020 | kvm_x86_ops->guest_debug_pre(vcpu); | ||
2021 | |||
2022 | again: | ||
2023 | r = kvm_mmu_reload(vcpu); | ||
2024 | if (unlikely(r)) | ||
2025 | goto out; | ||
2026 | |||
2027 | preempt_disable(); | ||
2028 | |||
2029 | kvm_x86_ops->prepare_guest_switch(vcpu); | ||
2030 | kvm_load_guest_fpu(vcpu); | ||
2031 | |||
2032 | local_irq_disable(); | ||
2033 | |||
2034 | if (signal_pending(current)) { | ||
2035 | local_irq_enable(); | ||
2036 | preempt_enable(); | ||
2037 | r = -EINTR; | ||
2038 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
2039 | ++vcpu->stat.signal_exits; | ||
2040 | goto out; | ||
2041 | } | ||
2042 | |||
2043 | if (irqchip_in_kernel(vcpu->kvm)) | ||
2044 | kvm_x86_ops->inject_pending_irq(vcpu); | ||
2045 | else if (!vcpu->mmio_read_completed) | ||
2046 | kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); | ||
2047 | |||
2048 | vcpu->guest_mode = 1; | ||
2049 | |||
2050 | if (vcpu->requests) | ||
2051 | if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) | ||
2052 | kvm_x86_ops->tlb_flush(vcpu); | ||
2053 | |||
2054 | kvm_x86_ops->run(vcpu, kvm_run); | ||
2055 | |||
2056 | vcpu->guest_mode = 0; | ||
2057 | local_irq_enable(); | ||
2058 | |||
2059 | ++vcpu->stat.exits; | ||
2060 | |||
2061 | preempt_enable(); | ||
2062 | |||
2063 | /* | ||
2064 | * Profile KVM exit RIPs: | ||
2065 | */ | ||
2066 | if (unlikely(prof_on == KVM_PROFILING)) { | ||
2067 | kvm_x86_ops->cache_regs(vcpu); | ||
2068 | profile_hit(KVM_PROFILING, (void *)vcpu->rip); | ||
2069 | } | ||
2070 | |||
2071 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); | ||
2072 | |||
2073 | if (r > 0) { | ||
2074 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | ||
2075 | r = -EINTR; | ||
2076 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
2077 | ++vcpu->stat.request_irq_exits; | ||
2078 | goto out; | ||
2079 | } | ||
2080 | if (!need_resched()) { | ||
2081 | ++vcpu->stat.light_exits; | ||
2082 | goto again; | ||
2083 | } | ||
2084 | } | ||
2085 | |||
2086 | out: | ||
2087 | if (r > 0) { | ||
2088 | kvm_resched(vcpu); | ||
2089 | goto preempted; | ||
2090 | } | ||
2091 | |||
2092 | post_kvm_run_save(vcpu, kvm_run); | ||
2093 | |||
2094 | return r; | ||
1868 | } | 2095 | } |
1869 | EXPORT_SYMBOL_GPL(kvm_setup_pio); | 2096 | |
1870 | 2097 | ||
1871 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2098 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1872 | { | 2099 | { |
@@ -1875,11 +2102,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1875 | 2102 | ||
1876 | vcpu_load(vcpu); | 2103 | vcpu_load(vcpu); |
1877 | 2104 | ||
2105 | if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { | ||
2106 | kvm_vcpu_block(vcpu); | ||
2107 | vcpu_put(vcpu); | ||
2108 | return -EAGAIN; | ||
2109 | } | ||
2110 | |||
1878 | if (vcpu->sigset_active) | 2111 | if (vcpu->sigset_active) |
1879 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 2112 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
1880 | 2113 | ||
1881 | /* re-sync apic's tpr */ | 2114 | /* re-sync apic's tpr */ |
1882 | vcpu->cr8 = kvm_run->cr8; | 2115 | if (!irqchip_in_kernel(vcpu->kvm)) |
2116 | set_cr8(vcpu, kvm_run->cr8); | ||
1883 | 2117 | ||
1884 | if (vcpu->pio.cur_count) { | 2118 | if (vcpu->pio.cur_count) { |
1885 | r = complete_pio(vcpu); | 2119 | r = complete_pio(vcpu); |
@@ -1897,19 +2131,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1897 | /* | 2131 | /* |
1898 | * Read-modify-write. Back to userspace. | 2132 | * Read-modify-write. Back to userspace. |
1899 | */ | 2133 | */ |
1900 | kvm_run->exit_reason = KVM_EXIT_MMIO; | ||
1901 | r = 0; | 2134 | r = 0; |
1902 | goto out; | 2135 | goto out; |
1903 | } | 2136 | } |
1904 | } | 2137 | } |
1905 | 2138 | ||
1906 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { | 2139 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { |
1907 | kvm_arch_ops->cache_regs(vcpu); | 2140 | kvm_x86_ops->cache_regs(vcpu); |
1908 | vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; | 2141 | vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; |
1909 | kvm_arch_ops->decache_regs(vcpu); | 2142 | kvm_x86_ops->decache_regs(vcpu); |
1910 | } | 2143 | } |
1911 | 2144 | ||
1912 | r = kvm_arch_ops->run(vcpu, kvm_run); | 2145 | r = __vcpu_run(vcpu, kvm_run); |
1913 | 2146 | ||
1914 | out: | 2147 | out: |
1915 | if (vcpu->sigset_active) | 2148 | if (vcpu->sigset_active) |
@@ -1924,7 +2157,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, | |||
1924 | { | 2157 | { |
1925 | vcpu_load(vcpu); | 2158 | vcpu_load(vcpu); |
1926 | 2159 | ||
1927 | kvm_arch_ops->cache_regs(vcpu); | 2160 | kvm_x86_ops->cache_regs(vcpu); |
1928 | 2161 | ||
1929 | regs->rax = vcpu->regs[VCPU_REGS_RAX]; | 2162 | regs->rax = vcpu->regs[VCPU_REGS_RAX]; |
1930 | regs->rbx = vcpu->regs[VCPU_REGS_RBX]; | 2163 | regs->rbx = vcpu->regs[VCPU_REGS_RBX]; |
@@ -1946,7 +2179,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, | |||
1946 | #endif | 2179 | #endif |
1947 | 2180 | ||
1948 | regs->rip = vcpu->rip; | 2181 | regs->rip = vcpu->rip; |
1949 | regs->rflags = kvm_arch_ops->get_rflags(vcpu); | 2182 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); |
1950 | 2183 | ||
1951 | /* | 2184 | /* |
1952 | * Don't leak debug flags in case they were set for guest debugging | 2185 | * Don't leak debug flags in case they were set for guest debugging |
@@ -1984,9 +2217,9 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, | |||
1984 | #endif | 2217 | #endif |
1985 | 2218 | ||
1986 | vcpu->rip = regs->rip; | 2219 | vcpu->rip = regs->rip; |
1987 | kvm_arch_ops->set_rflags(vcpu, regs->rflags); | 2220 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); |
1988 | 2221 | ||
1989 | kvm_arch_ops->decache_regs(vcpu); | 2222 | kvm_x86_ops->decache_regs(vcpu); |
1990 | 2223 | ||
1991 | vcpu_put(vcpu); | 2224 | vcpu_put(vcpu); |
1992 | 2225 | ||
@@ -1996,13 +2229,14 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, | |||
1996 | static void get_segment(struct kvm_vcpu *vcpu, | 2229 | static void get_segment(struct kvm_vcpu *vcpu, |
1997 | struct kvm_segment *var, int seg) | 2230 | struct kvm_segment *var, int seg) |
1998 | { | 2231 | { |
1999 | return kvm_arch_ops->get_segment(vcpu, var, seg); | 2232 | return kvm_x86_ops->get_segment(vcpu, var, seg); |
2000 | } | 2233 | } |
2001 | 2234 | ||
2002 | static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 2235 | static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
2003 | struct kvm_sregs *sregs) | 2236 | struct kvm_sregs *sregs) |
2004 | { | 2237 | { |
2005 | struct descriptor_table dt; | 2238 | struct descriptor_table dt; |
2239 | int pending_vec; | ||
2006 | 2240 | ||
2007 | vcpu_load(vcpu); | 2241 | vcpu_load(vcpu); |
2008 | 2242 | ||
@@ -2016,24 +2250,31 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
2016 | get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | 2250 | get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); |
2017 | get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 2251 | get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
2018 | 2252 | ||
2019 | kvm_arch_ops->get_idt(vcpu, &dt); | 2253 | kvm_x86_ops->get_idt(vcpu, &dt); |
2020 | sregs->idt.limit = dt.limit; | 2254 | sregs->idt.limit = dt.limit; |
2021 | sregs->idt.base = dt.base; | 2255 | sregs->idt.base = dt.base; |
2022 | kvm_arch_ops->get_gdt(vcpu, &dt); | 2256 | kvm_x86_ops->get_gdt(vcpu, &dt); |
2023 | sregs->gdt.limit = dt.limit; | 2257 | sregs->gdt.limit = dt.limit; |
2024 | sregs->gdt.base = dt.base; | 2258 | sregs->gdt.base = dt.base; |
2025 | 2259 | ||
2026 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); | 2260 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
2027 | sregs->cr0 = vcpu->cr0; | 2261 | sregs->cr0 = vcpu->cr0; |
2028 | sregs->cr2 = vcpu->cr2; | 2262 | sregs->cr2 = vcpu->cr2; |
2029 | sregs->cr3 = vcpu->cr3; | 2263 | sregs->cr3 = vcpu->cr3; |
2030 | sregs->cr4 = vcpu->cr4; | 2264 | sregs->cr4 = vcpu->cr4; |
2031 | sregs->cr8 = vcpu->cr8; | 2265 | sregs->cr8 = get_cr8(vcpu); |
2032 | sregs->efer = vcpu->shadow_efer; | 2266 | sregs->efer = vcpu->shadow_efer; |
2033 | sregs->apic_base = vcpu->apic_base; | 2267 | sregs->apic_base = kvm_get_apic_base(vcpu); |
2034 | 2268 | ||
2035 | memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, | 2269 | if (irqchip_in_kernel(vcpu->kvm)) { |
2036 | sizeof sregs->interrupt_bitmap); | 2270 | memset(sregs->interrupt_bitmap, 0, |
2271 | sizeof sregs->interrupt_bitmap); | ||
2272 | pending_vec = kvm_x86_ops->get_irq(vcpu); | ||
2273 | if (pending_vec >= 0) | ||
2274 | set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap); | ||
2275 | } else | ||
2276 | memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, | ||
2277 | sizeof sregs->interrupt_bitmap); | ||
2037 | 2278 | ||
2038 | vcpu_put(vcpu); | 2279 | vcpu_put(vcpu); |
2039 | 2280 | ||
@@ -2043,56 +2284,69 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
2043 | static void set_segment(struct kvm_vcpu *vcpu, | 2284 | static void set_segment(struct kvm_vcpu *vcpu, |
2044 | struct kvm_segment *var, int seg) | 2285 | struct kvm_segment *var, int seg) |
2045 | { | 2286 | { |
2046 | return kvm_arch_ops->set_segment(vcpu, var, seg); | 2287 | return kvm_x86_ops->set_segment(vcpu, var, seg); |
2047 | } | 2288 | } |
2048 | 2289 | ||
2049 | static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 2290 | static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
2050 | struct kvm_sregs *sregs) | 2291 | struct kvm_sregs *sregs) |
2051 | { | 2292 | { |
2052 | int mmu_reset_needed = 0; | 2293 | int mmu_reset_needed = 0; |
2053 | int i; | 2294 | int i, pending_vec, max_bits; |
2054 | struct descriptor_table dt; | 2295 | struct descriptor_table dt; |
2055 | 2296 | ||
2056 | vcpu_load(vcpu); | 2297 | vcpu_load(vcpu); |
2057 | 2298 | ||
2058 | dt.limit = sregs->idt.limit; | 2299 | dt.limit = sregs->idt.limit; |
2059 | dt.base = sregs->idt.base; | 2300 | dt.base = sregs->idt.base; |
2060 | kvm_arch_ops->set_idt(vcpu, &dt); | 2301 | kvm_x86_ops->set_idt(vcpu, &dt); |
2061 | dt.limit = sregs->gdt.limit; | 2302 | dt.limit = sregs->gdt.limit; |
2062 | dt.base = sregs->gdt.base; | 2303 | dt.base = sregs->gdt.base; |
2063 | kvm_arch_ops->set_gdt(vcpu, &dt); | 2304 | kvm_x86_ops->set_gdt(vcpu, &dt); |
2064 | 2305 | ||
2065 | vcpu->cr2 = sregs->cr2; | 2306 | vcpu->cr2 = sregs->cr2; |
2066 | mmu_reset_needed |= vcpu->cr3 != sregs->cr3; | 2307 | mmu_reset_needed |= vcpu->cr3 != sregs->cr3; |
2067 | vcpu->cr3 = sregs->cr3; | 2308 | vcpu->cr3 = sregs->cr3; |
2068 | 2309 | ||
2069 | vcpu->cr8 = sregs->cr8; | 2310 | set_cr8(vcpu, sregs->cr8); |
2070 | 2311 | ||
2071 | mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; | 2312 | mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; |
2072 | #ifdef CONFIG_X86_64 | 2313 | #ifdef CONFIG_X86_64 |
2073 | kvm_arch_ops->set_efer(vcpu, sregs->efer); | 2314 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
2074 | #endif | 2315 | #endif |
2075 | vcpu->apic_base = sregs->apic_base; | 2316 | kvm_set_apic_base(vcpu, sregs->apic_base); |
2076 | 2317 | ||
2077 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); | 2318 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
2078 | 2319 | ||
2079 | mmu_reset_needed |= vcpu->cr0 != sregs->cr0; | 2320 | mmu_reset_needed |= vcpu->cr0 != sregs->cr0; |
2080 | kvm_arch_ops->set_cr0(vcpu, sregs->cr0); | 2321 | vcpu->cr0 = sregs->cr0; |
2322 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | ||
2081 | 2323 | ||
2082 | mmu_reset_needed |= vcpu->cr4 != sregs->cr4; | 2324 | mmu_reset_needed |= vcpu->cr4 != sregs->cr4; |
2083 | kvm_arch_ops->set_cr4(vcpu, sregs->cr4); | 2325 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
2084 | if (!is_long_mode(vcpu) && is_pae(vcpu)) | 2326 | if (!is_long_mode(vcpu) && is_pae(vcpu)) |
2085 | load_pdptrs(vcpu, vcpu->cr3); | 2327 | load_pdptrs(vcpu, vcpu->cr3); |
2086 | 2328 | ||
2087 | if (mmu_reset_needed) | 2329 | if (mmu_reset_needed) |
2088 | kvm_mmu_reset_context(vcpu); | 2330 | kvm_mmu_reset_context(vcpu); |
2089 | 2331 | ||
2090 | memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, | 2332 | if (!irqchip_in_kernel(vcpu->kvm)) { |
2091 | sizeof vcpu->irq_pending); | 2333 | memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, |
2092 | vcpu->irq_summary = 0; | 2334 | sizeof vcpu->irq_pending); |
2093 | for (i = 0; i < NR_IRQ_WORDS; ++i) | 2335 | vcpu->irq_summary = 0; |
2094 | if (vcpu->irq_pending[i]) | 2336 | for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) |
2095 | __set_bit(i, &vcpu->irq_summary); | 2337 | if (vcpu->irq_pending[i]) |
2338 | __set_bit(i, &vcpu->irq_summary); | ||
2339 | } else { | ||
2340 | max_bits = (sizeof sregs->interrupt_bitmap) << 3; | ||
2341 | pending_vec = find_first_bit( | ||
2342 | (const unsigned long *)sregs->interrupt_bitmap, | ||
2343 | max_bits); | ||
2344 | /* Only pending external irq is handled here */ | ||
2345 | if (pending_vec < max_bits) { | ||
2346 | kvm_x86_ops->set_irq(vcpu, pending_vec); | ||
2347 | printk("Set back pending irq %d\n", pending_vec); | ||
2348 | } | ||
2349 | } | ||
2096 | 2350 | ||
2097 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 2351 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
2098 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | 2352 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); |
@@ -2109,6 +2363,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
2109 | return 0; | 2363 | return 0; |
2110 | } | 2364 | } |
2111 | 2365 | ||
2366 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | ||
2367 | { | ||
2368 | struct kvm_segment cs; | ||
2369 | |||
2370 | get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
2371 | *db = cs.db; | ||
2372 | *l = cs.l; | ||
2373 | } | ||
2374 | EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | ||
2375 | |||
2112 | /* | 2376 | /* |
2113 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 2377 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
2114 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 2378 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
@@ -2236,13 +2500,13 @@ static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
2236 | gpa_t gpa; | 2500 | gpa_t gpa; |
2237 | 2501 | ||
2238 | vcpu_load(vcpu); | 2502 | vcpu_load(vcpu); |
2239 | spin_lock(&vcpu->kvm->lock); | 2503 | mutex_lock(&vcpu->kvm->lock); |
2240 | gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); | 2504 | gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); |
2241 | tr->physical_address = gpa; | 2505 | tr->physical_address = gpa; |
2242 | tr->valid = gpa != UNMAPPED_GVA; | 2506 | tr->valid = gpa != UNMAPPED_GVA; |
2243 | tr->writeable = 1; | 2507 | tr->writeable = 1; |
2244 | tr->usermode = 0; | 2508 | tr->usermode = 0; |
2245 | spin_unlock(&vcpu->kvm->lock); | 2509 | mutex_unlock(&vcpu->kvm->lock); |
2246 | vcpu_put(vcpu); | 2510 | vcpu_put(vcpu); |
2247 | 2511 | ||
2248 | return 0; | 2512 | return 0; |
@@ -2253,6 +2517,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
2253 | { | 2517 | { |
2254 | if (irq->irq < 0 || irq->irq >= 256) | 2518 | if (irq->irq < 0 || irq->irq >= 256) |
2255 | return -EINVAL; | 2519 | return -EINVAL; |
2520 | if (irqchip_in_kernel(vcpu->kvm)) | ||
2521 | return -ENXIO; | ||
2256 | vcpu_load(vcpu); | 2522 | vcpu_load(vcpu); |
2257 | 2523 | ||
2258 | set_bit(irq->irq, vcpu->irq_pending); | 2524 | set_bit(irq->irq, vcpu->irq_pending); |
@@ -2270,7 +2536,7 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | |||
2270 | 2536 | ||
2271 | vcpu_load(vcpu); | 2537 | vcpu_load(vcpu); |
2272 | 2538 | ||
2273 | r = kvm_arch_ops->set_guest_debug(vcpu, dbg); | 2539 | r = kvm_x86_ops->set_guest_debug(vcpu, dbg); |
2274 | 2540 | ||
2275 | vcpu_put(vcpu); | 2541 | vcpu_put(vcpu); |
2276 | 2542 | ||
@@ -2285,7 +2551,6 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, | |||
2285 | unsigned long pgoff; | 2551 | unsigned long pgoff; |
2286 | struct page *page; | 2552 | struct page *page; |
2287 | 2553 | ||
2288 | *type = VM_FAULT_MINOR; | ||
2289 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 2554 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
2290 | if (pgoff == 0) | 2555 | if (pgoff == 0) |
2291 | page = virt_to_page(vcpu->run); | 2556 | page = virt_to_page(vcpu->run); |
@@ -2294,6 +2559,9 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, | |||
2294 | else | 2559 | else |
2295 | return NOPAGE_SIGBUS; | 2560 | return NOPAGE_SIGBUS; |
2296 | get_page(page); | 2561 | get_page(page); |
2562 | if (type != NULL) | ||
2563 | *type = VM_FAULT_MINOR; | ||
2564 | |||
2297 | return page; | 2565 | return page; |
2298 | } | 2566 | } |
2299 | 2567 | ||
@@ -2346,74 +2614,52 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
2346 | { | 2614 | { |
2347 | int r; | 2615 | int r; |
2348 | struct kvm_vcpu *vcpu; | 2616 | struct kvm_vcpu *vcpu; |
2349 | struct page *page; | ||
2350 | 2617 | ||
2351 | r = -EINVAL; | ||
2352 | if (!valid_vcpu(n)) | 2618 | if (!valid_vcpu(n)) |
2353 | goto out; | 2619 | return -EINVAL; |
2354 | |||
2355 | vcpu = &kvm->vcpus[n]; | ||
2356 | |||
2357 | mutex_lock(&vcpu->mutex); | ||
2358 | |||
2359 | if (vcpu->vmcs) { | ||
2360 | mutex_unlock(&vcpu->mutex); | ||
2361 | return -EEXIST; | ||
2362 | } | ||
2363 | |||
2364 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2365 | r = -ENOMEM; | ||
2366 | if (!page) | ||
2367 | goto out_unlock; | ||
2368 | vcpu->run = page_address(page); | ||
2369 | |||
2370 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2371 | r = -ENOMEM; | ||
2372 | if (!page) | ||
2373 | goto out_free_run; | ||
2374 | vcpu->pio_data = page_address(page); | ||
2375 | 2620 | ||
2376 | vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, | 2621 | vcpu = kvm_x86_ops->vcpu_create(kvm, n); |
2377 | FX_IMAGE_ALIGN); | 2622 | if (IS_ERR(vcpu)) |
2378 | vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; | 2623 | return PTR_ERR(vcpu); |
2379 | vcpu->cr0 = 0x10; | ||
2380 | 2624 | ||
2381 | r = kvm_arch_ops->vcpu_create(vcpu); | 2625 | preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); |
2382 | if (r < 0) | ||
2383 | goto out_free_vcpus; | ||
2384 | 2626 | ||
2385 | r = kvm_mmu_create(vcpu); | 2627 | /* We do fxsave: this must be aligned. */ |
2386 | if (r < 0) | 2628 | BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF); |
2387 | goto out_free_vcpus; | ||
2388 | 2629 | ||
2389 | kvm_arch_ops->vcpu_load(vcpu); | 2630 | vcpu_load(vcpu); |
2390 | r = kvm_mmu_setup(vcpu); | 2631 | r = kvm_mmu_setup(vcpu); |
2391 | if (r >= 0) | ||
2392 | r = kvm_arch_ops->vcpu_setup(vcpu); | ||
2393 | vcpu_put(vcpu); | 2632 | vcpu_put(vcpu); |
2394 | |||
2395 | if (r < 0) | 2633 | if (r < 0) |
2396 | goto out_free_vcpus; | 2634 | goto free_vcpu; |
2397 | 2635 | ||
2636 | mutex_lock(&kvm->lock); | ||
2637 | if (kvm->vcpus[n]) { | ||
2638 | r = -EEXIST; | ||
2639 | mutex_unlock(&kvm->lock); | ||
2640 | goto mmu_unload; | ||
2641 | } | ||
2642 | kvm->vcpus[n] = vcpu; | ||
2643 | mutex_unlock(&kvm->lock); | ||
2644 | |||
2645 | /* Now it's all set up, let userspace reach it */ | ||
2398 | r = create_vcpu_fd(vcpu); | 2646 | r = create_vcpu_fd(vcpu); |
2399 | if (r < 0) | 2647 | if (r < 0) |
2400 | goto out_free_vcpus; | 2648 | goto unlink; |
2649 | return r; | ||
2401 | 2650 | ||
2402 | spin_lock(&kvm_lock); | 2651 | unlink: |
2403 | if (n >= kvm->nvcpus) | 2652 | mutex_lock(&kvm->lock); |
2404 | kvm->nvcpus = n + 1; | 2653 | kvm->vcpus[n] = NULL; |
2405 | spin_unlock(&kvm_lock); | 2654 | mutex_unlock(&kvm->lock); |
2406 | 2655 | ||
2407 | return r; | 2656 | mmu_unload: |
2657 | vcpu_load(vcpu); | ||
2658 | kvm_mmu_unload(vcpu); | ||
2659 | vcpu_put(vcpu); | ||
2408 | 2660 | ||
2409 | out_free_vcpus: | 2661 | free_vcpu: |
2410 | kvm_free_vcpu(vcpu); | 2662 | kvm_x86_ops->vcpu_free(vcpu); |
2411 | out_free_run: | ||
2412 | free_page((unsigned long)vcpu->run); | ||
2413 | vcpu->run = NULL; | ||
2414 | out_unlock: | ||
2415 | mutex_unlock(&vcpu->mutex); | ||
2416 | out: | ||
2417 | return r; | 2663 | return r; |
2418 | } | 2664 | } |
2419 | 2665 | ||
@@ -2493,7 +2739,7 @@ struct fxsave { | |||
2493 | 2739 | ||
2494 | static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 2740 | static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
2495 | { | 2741 | { |
2496 | struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; | 2742 | struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; |
2497 | 2743 | ||
2498 | vcpu_load(vcpu); | 2744 | vcpu_load(vcpu); |
2499 | 2745 | ||
@@ -2513,7 +2759,7 @@ static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
2513 | 2759 | ||
2514 | static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 2760 | static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
2515 | { | 2761 | { |
2516 | struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; | 2762 | struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; |
2517 | 2763 | ||
2518 | vcpu_load(vcpu); | 2764 | vcpu_load(vcpu); |
2519 | 2765 | ||
@@ -2531,6 +2777,27 @@ static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
2531 | return 0; | 2777 | return 0; |
2532 | } | 2778 | } |
2533 | 2779 | ||
2780 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | ||
2781 | struct kvm_lapic_state *s) | ||
2782 | { | ||
2783 | vcpu_load(vcpu); | ||
2784 | memcpy(s->regs, vcpu->apic->regs, sizeof *s); | ||
2785 | vcpu_put(vcpu); | ||
2786 | |||
2787 | return 0; | ||
2788 | } | ||
2789 | |||
2790 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | ||
2791 | struct kvm_lapic_state *s) | ||
2792 | { | ||
2793 | vcpu_load(vcpu); | ||
2794 | memcpy(vcpu->apic->regs, s->regs, sizeof *s); | ||
2795 | kvm_apic_post_state_restore(vcpu); | ||
2796 | vcpu_put(vcpu); | ||
2797 | |||
2798 | return 0; | ||
2799 | } | ||
2800 | |||
2534 | static long kvm_vcpu_ioctl(struct file *filp, | 2801 | static long kvm_vcpu_ioctl(struct file *filp, |
2535 | unsigned int ioctl, unsigned long arg) | 2802 | unsigned int ioctl, unsigned long arg) |
2536 | { | 2803 | { |
@@ -2700,6 +2967,31 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
2700 | r = 0; | 2967 | r = 0; |
2701 | break; | 2968 | break; |
2702 | } | 2969 | } |
2970 | case KVM_GET_LAPIC: { | ||
2971 | struct kvm_lapic_state lapic; | ||
2972 | |||
2973 | memset(&lapic, 0, sizeof lapic); | ||
2974 | r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); | ||
2975 | if (r) | ||
2976 | goto out; | ||
2977 | r = -EFAULT; | ||
2978 | if (copy_to_user(argp, &lapic, sizeof lapic)) | ||
2979 | goto out; | ||
2980 | r = 0; | ||
2981 | break; | ||
2982 | } | ||
2983 | case KVM_SET_LAPIC: { | ||
2984 | struct kvm_lapic_state lapic; | ||
2985 | |||
2986 | r = -EFAULT; | ||
2987 | if (copy_from_user(&lapic, argp, sizeof lapic)) | ||
2988 | goto out; | ||
2989 | r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; | ||
2990 | if (r) | ||
2991 | goto out; | ||
2992 | r = 0; | ||
2993 | break; | ||
2994 | } | ||
2703 | default: | 2995 | default: |
2704 | ; | 2996 | ; |
2705 | } | 2997 | } |
@@ -2753,6 +3045,75 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2753 | goto out; | 3045 | goto out; |
2754 | break; | 3046 | break; |
2755 | } | 3047 | } |
3048 | case KVM_CREATE_IRQCHIP: | ||
3049 | r = -ENOMEM; | ||
3050 | kvm->vpic = kvm_create_pic(kvm); | ||
3051 | if (kvm->vpic) { | ||
3052 | r = kvm_ioapic_init(kvm); | ||
3053 | if (r) { | ||
3054 | kfree(kvm->vpic); | ||
3055 | kvm->vpic = NULL; | ||
3056 | goto out; | ||
3057 | } | ||
3058 | } | ||
3059 | else | ||
3060 | goto out; | ||
3061 | break; | ||
3062 | case KVM_IRQ_LINE: { | ||
3063 | struct kvm_irq_level irq_event; | ||
3064 | |||
3065 | r = -EFAULT; | ||
3066 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | ||
3067 | goto out; | ||
3068 | if (irqchip_in_kernel(kvm)) { | ||
3069 | mutex_lock(&kvm->lock); | ||
3070 | if (irq_event.irq < 16) | ||
3071 | kvm_pic_set_irq(pic_irqchip(kvm), | ||
3072 | irq_event.irq, | ||
3073 | irq_event.level); | ||
3074 | kvm_ioapic_set_irq(kvm->vioapic, | ||
3075 | irq_event.irq, | ||
3076 | irq_event.level); | ||
3077 | mutex_unlock(&kvm->lock); | ||
3078 | r = 0; | ||
3079 | } | ||
3080 | break; | ||
3081 | } | ||
3082 | case KVM_GET_IRQCHIP: { | ||
3083 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | ||
3084 | struct kvm_irqchip chip; | ||
3085 | |||
3086 | r = -EFAULT; | ||
3087 | if (copy_from_user(&chip, argp, sizeof chip)) | ||
3088 | goto out; | ||
3089 | r = -ENXIO; | ||
3090 | if (!irqchip_in_kernel(kvm)) | ||
3091 | goto out; | ||
3092 | r = kvm_vm_ioctl_get_irqchip(kvm, &chip); | ||
3093 | if (r) | ||
3094 | goto out; | ||
3095 | r = -EFAULT; | ||
3096 | if (copy_to_user(argp, &chip, sizeof chip)) | ||
3097 | goto out; | ||
3098 | r = 0; | ||
3099 | break; | ||
3100 | } | ||
3101 | case KVM_SET_IRQCHIP: { | ||
3102 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | ||
3103 | struct kvm_irqchip chip; | ||
3104 | |||
3105 | r = -EFAULT; | ||
3106 | if (copy_from_user(&chip, argp, sizeof chip)) | ||
3107 | goto out; | ||
3108 | r = -ENXIO; | ||
3109 | if (!irqchip_in_kernel(kvm)) | ||
3110 | goto out; | ||
3111 | r = kvm_vm_ioctl_set_irqchip(kvm, &chip); | ||
3112 | if (r) | ||
3113 | goto out; | ||
3114 | r = 0; | ||
3115 | break; | ||
3116 | } | ||
2756 | default: | 3117 | default: |
2757 | ; | 3118 | ; |
2758 | } | 3119 | } |
@@ -2768,12 +3129,14 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, | |||
2768 | unsigned long pgoff; | 3129 | unsigned long pgoff; |
2769 | struct page *page; | 3130 | struct page *page; |
2770 | 3131 | ||
2771 | *type = VM_FAULT_MINOR; | ||
2772 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 3132 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
2773 | page = gfn_to_page(kvm, pgoff); | 3133 | page = gfn_to_page(kvm, pgoff); |
2774 | if (!page) | 3134 | if (!page) |
2775 | return NOPAGE_SIGBUS; | 3135 | return NOPAGE_SIGBUS; |
2776 | get_page(page); | 3136 | get_page(page); |
3137 | if (type != NULL) | ||
3138 | *type = VM_FAULT_MINOR; | ||
3139 | |||
2777 | return page; | 3140 | return page; |
2778 | } | 3141 | } |
2779 | 3142 | ||
@@ -2861,12 +3224,20 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2861 | r = 0; | 3224 | r = 0; |
2862 | break; | 3225 | break; |
2863 | } | 3226 | } |
2864 | case KVM_CHECK_EXTENSION: | 3227 | case KVM_CHECK_EXTENSION: { |
2865 | /* | 3228 | int ext = (long)argp; |
2866 | * No extensions defined at present. | 3229 | |
2867 | */ | 3230 | switch (ext) { |
2868 | r = 0; | 3231 | case KVM_CAP_IRQCHIP: |
3232 | case KVM_CAP_HLT: | ||
3233 | r = 1; | ||
3234 | break; | ||
3235 | default: | ||
3236 | r = 0; | ||
3237 | break; | ||
3238 | } | ||
2869 | break; | 3239 | break; |
3240 | } | ||
2870 | case KVM_GET_VCPU_MMAP_SIZE: | 3241 | case KVM_GET_VCPU_MMAP_SIZE: |
2871 | r = -EINVAL; | 3242 | r = -EINVAL; |
2872 | if (arg) | 3243 | if (arg) |
@@ -2881,8 +3252,6 @@ out: | |||
2881 | } | 3252 | } |
2882 | 3253 | ||
2883 | static struct file_operations kvm_chardev_ops = { | 3254 | static struct file_operations kvm_chardev_ops = { |
2884 | .open = kvm_dev_open, | ||
2885 | .release = kvm_dev_release, | ||
2886 | .unlocked_ioctl = kvm_dev_ioctl, | 3255 | .unlocked_ioctl = kvm_dev_ioctl, |
2887 | .compat_ioctl = kvm_dev_ioctl, | 3256 | .compat_ioctl = kvm_dev_ioctl, |
2888 | }; | 3257 | }; |
@@ -2893,25 +3262,6 @@ static struct miscdevice kvm_dev = { | |||
2893 | &kvm_chardev_ops, | 3262 | &kvm_chardev_ops, |
2894 | }; | 3263 | }; |
2895 | 3264 | ||
2896 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | ||
2897 | void *v) | ||
2898 | { | ||
2899 | if (val == SYS_RESTART) { | ||
2900 | /* | ||
2901 | * Some (well, at least mine) BIOSes hang on reboot if | ||
2902 | * in vmx root mode. | ||
2903 | */ | ||
2904 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | ||
2905 | on_each_cpu(hardware_disable, NULL, 0, 1); | ||
2906 | } | ||
2907 | return NOTIFY_OK; | ||
2908 | } | ||
2909 | |||
2910 | static struct notifier_block kvm_reboot_notifier = { | ||
2911 | .notifier_call = kvm_reboot, | ||
2912 | .priority = 0, | ||
2913 | }; | ||
2914 | |||
2915 | /* | 3265 | /* |
2916 | * Make sure that a cpu that is being hot-unplugged does not have any vcpus | 3266 | * Make sure that a cpu that is being hot-unplugged does not have any vcpus |
2917 | * cached on it. | 3267 | * cached on it. |
@@ -2925,7 +3275,9 @@ static void decache_vcpus_on_cpu(int cpu) | |||
2925 | spin_lock(&kvm_lock); | 3275 | spin_lock(&kvm_lock); |
2926 | list_for_each_entry(vm, &vm_list, vm_list) | 3276 | list_for_each_entry(vm, &vm_list, vm_list) |
2927 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 3277 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
2928 | vcpu = &vm->vcpus[i]; | 3278 | vcpu = vm->vcpus[i]; |
3279 | if (!vcpu) | ||
3280 | continue; | ||
2929 | /* | 3281 | /* |
2930 | * If the vcpu is locked, then it is running on some | 3282 | * If the vcpu is locked, then it is running on some |
2931 | * other cpu and therefore it is not cached on the | 3283 | * other cpu and therefore it is not cached on the |
@@ -2936,7 +3288,7 @@ static void decache_vcpus_on_cpu(int cpu) | |||
2936 | */ | 3288 | */ |
2937 | if (mutex_trylock(&vcpu->mutex)) { | 3289 | if (mutex_trylock(&vcpu->mutex)) { |
2938 | if (vcpu->cpu == cpu) { | 3290 | if (vcpu->cpu == cpu) { |
2939 | kvm_arch_ops->vcpu_decache(vcpu); | 3291 | kvm_x86_ops->vcpu_decache(vcpu); |
2940 | vcpu->cpu = -1; | 3292 | vcpu->cpu = -1; |
2941 | } | 3293 | } |
2942 | mutex_unlock(&vcpu->mutex); | 3294 | mutex_unlock(&vcpu->mutex); |
@@ -2952,7 +3304,7 @@ static void hardware_enable(void *junk) | |||
2952 | if (cpu_isset(cpu, cpus_hardware_enabled)) | 3304 | if (cpu_isset(cpu, cpus_hardware_enabled)) |
2953 | return; | 3305 | return; |
2954 | cpu_set(cpu, cpus_hardware_enabled); | 3306 | cpu_set(cpu, cpus_hardware_enabled); |
2955 | kvm_arch_ops->hardware_enable(NULL); | 3307 | kvm_x86_ops->hardware_enable(NULL); |
2956 | } | 3308 | } |
2957 | 3309 | ||
2958 | static void hardware_disable(void *junk) | 3310 | static void hardware_disable(void *junk) |
@@ -2963,7 +3315,7 @@ static void hardware_disable(void *junk) | |||
2963 | return; | 3315 | return; |
2964 | cpu_clear(cpu, cpus_hardware_enabled); | 3316 | cpu_clear(cpu, cpus_hardware_enabled); |
2965 | decache_vcpus_on_cpu(cpu); | 3317 | decache_vcpus_on_cpu(cpu); |
2966 | kvm_arch_ops->hardware_disable(NULL); | 3318 | kvm_x86_ops->hardware_disable(NULL); |
2967 | } | 3319 | } |
2968 | 3320 | ||
2969 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 3321 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
@@ -2994,6 +3346,25 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
2994 | return NOTIFY_OK; | 3346 | return NOTIFY_OK; |
2995 | } | 3347 | } |
2996 | 3348 | ||
3349 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | ||
3350 | void *v) | ||
3351 | { | ||
3352 | if (val == SYS_RESTART) { | ||
3353 | /* | ||
3354 | * Some (well, at least mine) BIOSes hang on reboot if | ||
3355 | * in vmx root mode. | ||
3356 | */ | ||
3357 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | ||
3358 | on_each_cpu(hardware_disable, NULL, 0, 1); | ||
3359 | } | ||
3360 | return NOTIFY_OK; | ||
3361 | } | ||
3362 | |||
3363 | static struct notifier_block kvm_reboot_notifier = { | ||
3364 | .notifier_call = kvm_reboot, | ||
3365 | .priority = 0, | ||
3366 | }; | ||
3367 | |||
2997 | void kvm_io_bus_init(struct kvm_io_bus *bus) | 3368 | void kvm_io_bus_init(struct kvm_io_bus *bus) |
2998 | { | 3369 | { |
2999 | memset(bus, 0, sizeof(*bus)); | 3370 | memset(bus, 0, sizeof(*bus)); |
@@ -3047,18 +3418,15 @@ static u64 stat_get(void *_offset) | |||
3047 | spin_lock(&kvm_lock); | 3418 | spin_lock(&kvm_lock); |
3048 | list_for_each_entry(kvm, &vm_list, vm_list) | 3419 | list_for_each_entry(kvm, &vm_list, vm_list) |
3049 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 3420 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
3050 | vcpu = &kvm->vcpus[i]; | 3421 | vcpu = kvm->vcpus[i]; |
3051 | total += *(u32 *)((void *)vcpu + offset); | 3422 | if (vcpu) |
3423 | total += *(u32 *)((void *)vcpu + offset); | ||
3052 | } | 3424 | } |
3053 | spin_unlock(&kvm_lock); | 3425 | spin_unlock(&kvm_lock); |
3054 | return total; | 3426 | return total; |
3055 | } | 3427 | } |
3056 | 3428 | ||
3057 | static void stat_set(void *offset, u64 val) | 3429 | DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n"); |
3058 | { | ||
3059 | } | ||
3060 | |||
3061 | DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n"); | ||
3062 | 3430 | ||
3063 | static __init void kvm_init_debug(void) | 3431 | static __init void kvm_init_debug(void) |
3064 | { | 3432 | { |
@@ -3105,11 +3473,34 @@ static struct sys_device kvm_sysdev = { | |||
3105 | 3473 | ||
3106 | hpa_t bad_page_address; | 3474 | hpa_t bad_page_address; |
3107 | 3475 | ||
3108 | int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | 3476 | static inline |
3477 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | ||
3478 | { | ||
3479 | return container_of(pn, struct kvm_vcpu, preempt_notifier); | ||
3480 | } | ||
3481 | |||
3482 | static void kvm_sched_in(struct preempt_notifier *pn, int cpu) | ||
3483 | { | ||
3484 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); | ||
3485 | |||
3486 | kvm_x86_ops->vcpu_load(vcpu, cpu); | ||
3487 | } | ||
3488 | |||
3489 | static void kvm_sched_out(struct preempt_notifier *pn, | ||
3490 | struct task_struct *next) | ||
3491 | { | ||
3492 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); | ||
3493 | |||
3494 | kvm_x86_ops->vcpu_put(vcpu); | ||
3495 | } | ||
3496 | |||
3497 | int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size, | ||
3498 | struct module *module) | ||
3109 | { | 3499 | { |
3110 | int r; | 3500 | int r; |
3501 | int cpu; | ||
3111 | 3502 | ||
3112 | if (kvm_arch_ops) { | 3503 | if (kvm_x86_ops) { |
3113 | printk(KERN_ERR "kvm: already loaded the other module\n"); | 3504 | printk(KERN_ERR "kvm: already loaded the other module\n"); |
3114 | return -EEXIST; | 3505 | return -EEXIST; |
3115 | } | 3506 | } |
@@ -3123,12 +3514,20 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | |||
3123 | return -EOPNOTSUPP; | 3514 | return -EOPNOTSUPP; |
3124 | } | 3515 | } |
3125 | 3516 | ||
3126 | kvm_arch_ops = ops; | 3517 | kvm_x86_ops = ops; |
3127 | 3518 | ||
3128 | r = kvm_arch_ops->hardware_setup(); | 3519 | r = kvm_x86_ops->hardware_setup(); |
3129 | if (r < 0) | 3520 | if (r < 0) |
3130 | goto out; | 3521 | goto out; |
3131 | 3522 | ||
3523 | for_each_online_cpu(cpu) { | ||
3524 | smp_call_function_single(cpu, | ||
3525 | kvm_x86_ops->check_processor_compatibility, | ||
3526 | &r, 0, 1); | ||
3527 | if (r < 0) | ||
3528 | goto out_free_0; | ||
3529 | } | ||
3530 | |||
3132 | on_each_cpu(hardware_enable, NULL, 0, 1); | 3531 | on_each_cpu(hardware_enable, NULL, 0, 1); |
3133 | r = register_cpu_notifier(&kvm_cpu_notifier); | 3532 | r = register_cpu_notifier(&kvm_cpu_notifier); |
3134 | if (r) | 3533 | if (r) |
@@ -3143,6 +3542,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | |||
3143 | if (r) | 3542 | if (r) |
3144 | goto out_free_3; | 3543 | goto out_free_3; |
3145 | 3544 | ||
3545 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ | ||
3546 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, | ||
3547 | __alignof__(struct kvm_vcpu), 0, 0); | ||
3548 | if (!kvm_vcpu_cache) { | ||
3549 | r = -ENOMEM; | ||
3550 | goto out_free_4; | ||
3551 | } | ||
3552 | |||
3146 | kvm_chardev_ops.owner = module; | 3553 | kvm_chardev_ops.owner = module; |
3147 | 3554 | ||
3148 | r = misc_register(&kvm_dev); | 3555 | r = misc_register(&kvm_dev); |
@@ -3151,9 +3558,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | |||
3151 | goto out_free; | 3558 | goto out_free; |
3152 | } | 3559 | } |
3153 | 3560 | ||
3561 | kvm_preempt_ops.sched_in = kvm_sched_in; | ||
3562 | kvm_preempt_ops.sched_out = kvm_sched_out; | ||
3563 | |||
3154 | return r; | 3564 | return r; |
3155 | 3565 | ||
3156 | out_free: | 3566 | out_free: |
3567 | kmem_cache_destroy(kvm_vcpu_cache); | ||
3568 | out_free_4: | ||
3157 | sysdev_unregister(&kvm_sysdev); | 3569 | sysdev_unregister(&kvm_sysdev); |
3158 | out_free_3: | 3570 | out_free_3: |
3159 | sysdev_class_unregister(&kvm_sysdev_class); | 3571 | sysdev_class_unregister(&kvm_sysdev_class); |
@@ -3162,22 +3574,24 @@ out_free_2: | |||
3162 | unregister_cpu_notifier(&kvm_cpu_notifier); | 3574 | unregister_cpu_notifier(&kvm_cpu_notifier); |
3163 | out_free_1: | 3575 | out_free_1: |
3164 | on_each_cpu(hardware_disable, NULL, 0, 1); | 3576 | on_each_cpu(hardware_disable, NULL, 0, 1); |
3165 | kvm_arch_ops->hardware_unsetup(); | 3577 | out_free_0: |
3578 | kvm_x86_ops->hardware_unsetup(); | ||
3166 | out: | 3579 | out: |
3167 | kvm_arch_ops = NULL; | 3580 | kvm_x86_ops = NULL; |
3168 | return r; | 3581 | return r; |
3169 | } | 3582 | } |
3170 | 3583 | ||
3171 | void kvm_exit_arch(void) | 3584 | void kvm_exit_x86(void) |
3172 | { | 3585 | { |
3173 | misc_deregister(&kvm_dev); | 3586 | misc_deregister(&kvm_dev); |
3587 | kmem_cache_destroy(kvm_vcpu_cache); | ||
3174 | sysdev_unregister(&kvm_sysdev); | 3588 | sysdev_unregister(&kvm_sysdev); |
3175 | sysdev_class_unregister(&kvm_sysdev_class); | 3589 | sysdev_class_unregister(&kvm_sysdev_class); |
3176 | unregister_reboot_notifier(&kvm_reboot_notifier); | 3590 | unregister_reboot_notifier(&kvm_reboot_notifier); |
3177 | unregister_cpu_notifier(&kvm_cpu_notifier); | 3591 | unregister_cpu_notifier(&kvm_cpu_notifier); |
3178 | on_each_cpu(hardware_disable, NULL, 0, 1); | 3592 | on_each_cpu(hardware_disable, NULL, 0, 1); |
3179 | kvm_arch_ops->hardware_unsetup(); | 3593 | kvm_x86_ops->hardware_unsetup(); |
3180 | kvm_arch_ops = NULL; | 3594 | kvm_x86_ops = NULL; |
3181 | } | 3595 | } |
3182 | 3596 | ||
3183 | static __init int kvm_init(void) | 3597 | static __init int kvm_init(void) |
@@ -3220,5 +3634,5 @@ static __exit void kvm_exit(void) | |||
3220 | module_init(kvm_init) | 3634 | module_init(kvm_init) |
3221 | module_exit(kvm_exit) | 3635 | module_exit(kvm_exit) |
3222 | 3636 | ||
3223 | EXPORT_SYMBOL_GPL(kvm_init_arch); | 3637 | EXPORT_SYMBOL_GPL(kvm_init_x86); |
3224 | EXPORT_SYMBOL_GPL(kvm_exit_arch); | 3638 | EXPORT_SYMBOL_GPL(kvm_exit_x86); |
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h index a869983d683d..a0e415daef5b 100644 --- a/drivers/kvm/kvm_svm.h +++ b/drivers/kvm/kvm_svm.h | |||
@@ -20,7 +20,10 @@ static const u32 host_save_user_msrs[] = { | |||
20 | #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) | 20 | #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) |
21 | #define NUM_DB_REGS 4 | 21 | #define NUM_DB_REGS 4 |
22 | 22 | ||
23 | struct kvm_vcpu; | ||
24 | |||
23 | struct vcpu_svm { | 25 | struct vcpu_svm { |
26 | struct kvm_vcpu vcpu; | ||
24 | struct vmcb *vmcb; | 27 | struct vmcb *vmcb; |
25 | unsigned long vmcb_pa; | 28 | unsigned long vmcb_pa; |
26 | struct svm_cpu_data *svm_data; | 29 | struct svm_cpu_data *svm_data; |
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c new file mode 100644 index 000000000000..a190587cf6a5 --- /dev/null +++ b/drivers/kvm/lapic.c | |||
@@ -0,0 +1,1064 @@ | |||
1 | |||
2 | /* | ||
3 | * Local APIC virtualization | ||
4 | * | ||
5 | * Copyright (C) 2006 Qumranet, Inc. | ||
6 | * Copyright (C) 2007 Novell | ||
7 | * Copyright (C) 2007 Intel | ||
8 | * | ||
9 | * Authors: | ||
10 | * Dor Laor <dor.laor@qumranet.com> | ||
11 | * Gregory Haskins <ghaskins@novell.com> | ||
12 | * Yaozu (Eddie) Dong <eddie.dong@intel.com> | ||
13 | * | ||
14 | * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. | ||
15 | * | ||
16 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
17 | * the COPYING file in the top-level directory. | ||
18 | */ | ||
19 | |||
20 | #include "kvm.h" | ||
21 | #include <linux/kvm.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <linux/highmem.h> | ||
24 | #include <linux/smp.h> | ||
25 | #include <linux/hrtimer.h> | ||
26 | #include <linux/io.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <asm/processor.h> | ||
29 | #include <asm/msr.h> | ||
30 | #include <asm/page.h> | ||
31 | #include <asm/current.h> | ||
32 | #include <asm/apicdef.h> | ||
33 | #include <asm/atomic.h> | ||
34 | #include <asm/div64.h> | ||
35 | #include "irq.h" | ||
36 | |||
37 | #define PRId64 "d" | ||
38 | #define PRIx64 "llx" | ||
39 | #define PRIu64 "u" | ||
40 | #define PRIo64 "o" | ||
41 | |||
42 | #define APIC_BUS_CYCLE_NS 1 | ||
43 | |||
44 | /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ | ||
45 | #define apic_debug(fmt, arg...) | ||
46 | |||
47 | #define APIC_LVT_NUM 6 | ||
48 | /* 14 is the version for Xeon and Pentium 8.4.8*/ | ||
49 | #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) | ||
50 | #define LAPIC_MMIO_LENGTH (1 << 12) | ||
51 | /* followed define is not in apicdef.h */ | ||
52 | #define APIC_SHORT_MASK 0xc0000 | ||
53 | #define APIC_DEST_NOSHORT 0x0 | ||
54 | #define APIC_DEST_MASK 0x800 | ||
55 | #define MAX_APIC_VECTOR 256 | ||
56 | |||
57 | #define VEC_POS(v) ((v) & (32 - 1)) | ||
58 | #define REG_POS(v) (((v) >> 5) << 4) | ||
59 | static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) | ||
60 | { | ||
61 | return *((u32 *) (apic->regs + reg_off)); | ||
62 | } | ||
63 | |||
64 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) | ||
65 | { | ||
66 | *((u32 *) (apic->regs + reg_off)) = val; | ||
67 | } | ||
68 | |||
69 | static inline int apic_test_and_set_vector(int vec, void *bitmap) | ||
70 | { | ||
71 | return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
72 | } | ||
73 | |||
74 | static inline int apic_test_and_clear_vector(int vec, void *bitmap) | ||
75 | { | ||
76 | return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
77 | } | ||
78 | |||
79 | static inline void apic_set_vector(int vec, void *bitmap) | ||
80 | { | ||
81 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
82 | } | ||
83 | |||
84 | static inline void apic_clear_vector(int vec, void *bitmap) | ||
85 | { | ||
86 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
87 | } | ||
88 | |||
89 | static inline int apic_hw_enabled(struct kvm_lapic *apic) | ||
90 | { | ||
91 | return (apic)->vcpu->apic_base & MSR_IA32_APICBASE_ENABLE; | ||
92 | } | ||
93 | |||
94 | static inline int apic_sw_enabled(struct kvm_lapic *apic) | ||
95 | { | ||
96 | return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; | ||
97 | } | ||
98 | |||
99 | static inline int apic_enabled(struct kvm_lapic *apic) | ||
100 | { | ||
101 | return apic_sw_enabled(apic) && apic_hw_enabled(apic); | ||
102 | } | ||
103 | |||
104 | #define LVT_MASK \ | ||
105 | (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) | ||
106 | |||
107 | #define LINT_MASK \ | ||
108 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ | ||
109 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) | ||
110 | |||
111 | static inline int kvm_apic_id(struct kvm_lapic *apic) | ||
112 | { | ||
113 | return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | ||
114 | } | ||
115 | |||
116 | static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) | ||
117 | { | ||
118 | return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); | ||
119 | } | ||
120 | |||
121 | static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) | ||
122 | { | ||
123 | return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; | ||
124 | } | ||
125 | |||
126 | static inline int apic_lvtt_period(struct kvm_lapic *apic) | ||
127 | { | ||
128 | return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; | ||
129 | } | ||
130 | |||
131 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { | ||
132 | LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ | ||
133 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ | ||
134 | LVT_MASK | APIC_MODE_MASK, /* LVTPC */ | ||
135 | LINT_MASK, LINT_MASK, /* LVT0-1 */ | ||
136 | LVT_MASK /* LVTERR */ | ||
137 | }; | ||
138 | |||
139 | static int find_highest_vector(void *bitmap) | ||
140 | { | ||
141 | u32 *word = bitmap; | ||
142 | int word_offset = MAX_APIC_VECTOR >> 5; | ||
143 | |||
144 | while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) | ||
145 | continue; | ||
146 | |||
147 | if (likely(!word_offset && !word[0])) | ||
148 | return -1; | ||
149 | else | ||
150 | return fls(word[word_offset << 2]) - 1 + (word_offset << 5); | ||
151 | } | ||
152 | |||
153 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | ||
154 | { | ||
155 | return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); | ||
156 | } | ||
157 | |||
158 | static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | ||
159 | { | ||
160 | apic_clear_vector(vec, apic->regs + APIC_IRR); | ||
161 | } | ||
162 | |||
163 | static inline int apic_find_highest_irr(struct kvm_lapic *apic) | ||
164 | { | ||
165 | int result; | ||
166 | |||
167 | result = find_highest_vector(apic->regs + APIC_IRR); | ||
168 | ASSERT(result == -1 || result >= 16); | ||
169 | |||
170 | return result; | ||
171 | } | ||
172 | |||
173 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | ||
174 | { | ||
175 | struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; | ||
176 | int highest_irr; | ||
177 | |||
178 | if (!apic) | ||
179 | return 0; | ||
180 | highest_irr = apic_find_highest_irr(apic); | ||
181 | |||
182 | return highest_irr; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); | ||
185 | |||
186 | int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig) | ||
187 | { | ||
188 | if (!apic_test_and_set_irr(vec, apic)) { | ||
189 | /* a new pending irq is set in IRR */ | ||
190 | if (trig) | ||
191 | apic_set_vector(vec, apic->regs + APIC_TMR); | ||
192 | else | ||
193 | apic_clear_vector(vec, apic->regs + APIC_TMR); | ||
194 | kvm_vcpu_kick(apic->vcpu); | ||
195 | return 1; | ||
196 | } | ||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | ||
201 | { | ||
202 | int result; | ||
203 | |||
204 | result = find_highest_vector(apic->regs + APIC_ISR); | ||
205 | ASSERT(result == -1 || result >= 16); | ||
206 | |||
207 | return result; | ||
208 | } | ||
209 | |||
210 | static void apic_update_ppr(struct kvm_lapic *apic) | ||
211 | { | ||
212 | u32 tpr, isrv, ppr; | ||
213 | int isr; | ||
214 | |||
215 | tpr = apic_get_reg(apic, APIC_TASKPRI); | ||
216 | isr = apic_find_highest_isr(apic); | ||
217 | isrv = (isr != -1) ? isr : 0; | ||
218 | |||
219 | if ((tpr & 0xf0) >= (isrv & 0xf0)) | ||
220 | ppr = tpr & 0xff; | ||
221 | else | ||
222 | ppr = isrv & 0xf0; | ||
223 | |||
224 | apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", | ||
225 | apic, ppr, isr, isrv); | ||
226 | |||
227 | apic_set_reg(apic, APIC_PROCPRI, ppr); | ||
228 | } | ||
229 | |||
230 | static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) | ||
231 | { | ||
232 | apic_set_reg(apic, APIC_TASKPRI, tpr); | ||
233 | apic_update_ppr(apic); | ||
234 | } | ||
235 | |||
236 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) | ||
237 | { | ||
238 | return kvm_apic_id(apic) == dest; | ||
239 | } | ||
240 | |||
241 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | ||
242 | { | ||
243 | int result = 0; | ||
244 | u8 logical_id; | ||
245 | |||
246 | logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); | ||
247 | |||
248 | switch (apic_get_reg(apic, APIC_DFR)) { | ||
249 | case APIC_DFR_FLAT: | ||
250 | if (logical_id & mda) | ||
251 | result = 1; | ||
252 | break; | ||
253 | case APIC_DFR_CLUSTER: | ||
254 | if (((logical_id >> 4) == (mda >> 0x4)) | ||
255 | && (logical_id & mda & 0xf)) | ||
256 | result = 1; | ||
257 | break; | ||
258 | default: | ||
259 | printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n", | ||
260 | apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); | ||
261 | break; | ||
262 | } | ||
263 | |||
264 | return result; | ||
265 | } | ||
266 | |||
267 | static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | ||
268 | int short_hand, int dest, int dest_mode) | ||
269 | { | ||
270 | int result = 0; | ||
271 | struct kvm_lapic *target = vcpu->apic; | ||
272 | |||
273 | apic_debug("target %p, source %p, dest 0x%x, " | ||
274 | "dest_mode 0x%x, short_hand 0x%x", | ||
275 | target, source, dest, dest_mode, short_hand); | ||
276 | |||
277 | ASSERT(!target); | ||
278 | switch (short_hand) { | ||
279 | case APIC_DEST_NOSHORT: | ||
280 | if (dest_mode == 0) { | ||
281 | /* Physical mode. */ | ||
282 | if ((dest == 0xFF) || (dest == kvm_apic_id(target))) | ||
283 | result = 1; | ||
284 | } else | ||
285 | /* Logical mode. */ | ||
286 | result = kvm_apic_match_logical_addr(target, dest); | ||
287 | break; | ||
288 | case APIC_DEST_SELF: | ||
289 | if (target == source) | ||
290 | result = 1; | ||
291 | break; | ||
292 | case APIC_DEST_ALLINC: | ||
293 | result = 1; | ||
294 | break; | ||
295 | case APIC_DEST_ALLBUT: | ||
296 | if (target != source) | ||
297 | result = 1; | ||
298 | break; | ||
299 | default: | ||
300 | printk(KERN_WARNING "Bad dest shorthand value %x\n", | ||
301 | short_hand); | ||
302 | break; | ||
303 | } | ||
304 | |||
305 | return result; | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * Add a pending IRQ into lapic. | ||
310 | * Return 1 if successfully added and 0 if discarded. | ||
311 | */ | ||
312 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | ||
313 | int vector, int level, int trig_mode) | ||
314 | { | ||
315 | int orig_irr, result = 0; | ||
316 | struct kvm_vcpu *vcpu = apic->vcpu; | ||
317 | |||
318 | switch (delivery_mode) { | ||
319 | case APIC_DM_FIXED: | ||
320 | case APIC_DM_LOWEST: | ||
321 | /* FIXME add logic for vcpu on reset */ | ||
322 | if (unlikely(!apic_enabled(apic))) | ||
323 | break; | ||
324 | |||
325 | orig_irr = apic_test_and_set_irr(vector, apic); | ||
326 | if (orig_irr && trig_mode) { | ||
327 | apic_debug("level trig mode repeatedly for vector %d", | ||
328 | vector); | ||
329 | break; | ||
330 | } | ||
331 | |||
332 | if (trig_mode) { | ||
333 | apic_debug("level trig mode for vector %d", vector); | ||
334 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
335 | } else | ||
336 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
337 | |||
338 | if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE) | ||
339 | kvm_vcpu_kick(vcpu); | ||
340 | else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) { | ||
341 | vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; | ||
342 | if (waitqueue_active(&vcpu->wq)) | ||
343 | wake_up_interruptible(&vcpu->wq); | ||
344 | } | ||
345 | |||
346 | result = (orig_irr == 0); | ||
347 | break; | ||
348 | |||
349 | case APIC_DM_REMRD: | ||
350 | printk(KERN_DEBUG "Ignoring delivery mode 3\n"); | ||
351 | break; | ||
352 | |||
353 | case APIC_DM_SMI: | ||
354 | printk(KERN_DEBUG "Ignoring guest SMI\n"); | ||
355 | break; | ||
356 | case APIC_DM_NMI: | ||
357 | printk(KERN_DEBUG "Ignoring guest NMI\n"); | ||
358 | break; | ||
359 | |||
360 | case APIC_DM_INIT: | ||
361 | if (level) { | ||
362 | if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE) | ||
363 | printk(KERN_DEBUG | ||
364 | "INIT on a runnable vcpu %d\n", | ||
365 | vcpu->vcpu_id); | ||
366 | vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED; | ||
367 | kvm_vcpu_kick(vcpu); | ||
368 | } else { | ||
369 | printk(KERN_DEBUG | ||
370 | "Ignoring de-assert INIT to vcpu %d\n", | ||
371 | vcpu->vcpu_id); | ||
372 | } | ||
373 | |||
374 | break; | ||
375 | |||
376 | case APIC_DM_STARTUP: | ||
377 | printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", | ||
378 | vcpu->vcpu_id, vector); | ||
379 | if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) { | ||
380 | vcpu->sipi_vector = vector; | ||
381 | vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED; | ||
382 | if (waitqueue_active(&vcpu->wq)) | ||
383 | wake_up_interruptible(&vcpu->wq); | ||
384 | } | ||
385 | break; | ||
386 | |||
387 | default: | ||
388 | printk(KERN_ERR "TODO: unsupported delivery mode %x\n", | ||
389 | delivery_mode); | ||
390 | break; | ||
391 | } | ||
392 | return result; | ||
393 | } | ||
394 | |||
395 | struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, | ||
396 | unsigned long bitmap) | ||
397 | { | ||
398 | int vcpu_id; | ||
399 | int last; | ||
400 | int next; | ||
401 | struct kvm_lapic *apic; | ||
402 | |||
403 | last = kvm->round_robin_prev_vcpu; | ||
404 | next = last; | ||
405 | |||
406 | do { | ||
407 | if (++next == KVM_MAX_VCPUS) | ||
408 | next = 0; | ||
409 | if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap)) | ||
410 | continue; | ||
411 | apic = kvm->vcpus[next]->apic; | ||
412 | if (apic && apic_enabled(apic)) | ||
413 | break; | ||
414 | apic = NULL; | ||
415 | } while (next != last); | ||
416 | kvm->round_robin_prev_vcpu = next; | ||
417 | |||
418 | if (!apic) { | ||
419 | vcpu_id = ffs(bitmap) - 1; | ||
420 | if (vcpu_id < 0) { | ||
421 | vcpu_id = 0; | ||
422 | printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n"); | ||
423 | } | ||
424 | apic = kvm->vcpus[vcpu_id]->apic; | ||
425 | } | ||
426 | |||
427 | return apic; | ||
428 | } | ||
429 | |||
430 | static void apic_set_eoi(struct kvm_lapic *apic) | ||
431 | { | ||
432 | int vector = apic_find_highest_isr(apic); | ||
433 | |||
434 | /* | ||
435 | * Not every write EOI will has corresponding ISR, | ||
436 | * one example is when Kernel check timer on setup_IO_APIC | ||
437 | */ | ||
438 | if (vector == -1) | ||
439 | return; | ||
440 | |||
441 | apic_clear_vector(vector, apic->regs + APIC_ISR); | ||
442 | apic_update_ppr(apic); | ||
443 | |||
444 | if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) | ||
445 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector); | ||
446 | } | ||
447 | |||
448 | static void apic_send_ipi(struct kvm_lapic *apic) | ||
449 | { | ||
450 | u32 icr_low = apic_get_reg(apic, APIC_ICR); | ||
451 | u32 icr_high = apic_get_reg(apic, APIC_ICR2); | ||
452 | |||
453 | unsigned int dest = GET_APIC_DEST_FIELD(icr_high); | ||
454 | unsigned int short_hand = icr_low & APIC_SHORT_MASK; | ||
455 | unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; | ||
456 | unsigned int level = icr_low & APIC_INT_ASSERT; | ||
457 | unsigned int dest_mode = icr_low & APIC_DEST_MASK; | ||
458 | unsigned int delivery_mode = icr_low & APIC_MODE_MASK; | ||
459 | unsigned int vector = icr_low & APIC_VECTOR_MASK; | ||
460 | |||
461 | struct kvm_lapic *target; | ||
462 | struct kvm_vcpu *vcpu; | ||
463 | unsigned long lpr_map = 0; | ||
464 | int i; | ||
465 | |||
466 | apic_debug("icr_high 0x%x, icr_low 0x%x, " | ||
467 | "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " | ||
468 | "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", | ||
469 | icr_high, icr_low, short_hand, dest, | ||
470 | trig_mode, level, dest_mode, delivery_mode, vector); | ||
471 | |||
472 | for (i = 0; i < KVM_MAX_VCPUS; i++) { | ||
473 | vcpu = apic->vcpu->kvm->vcpus[i]; | ||
474 | if (!vcpu) | ||
475 | continue; | ||
476 | |||
477 | if (vcpu->apic && | ||
478 | apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) { | ||
479 | if (delivery_mode == APIC_DM_LOWEST) | ||
480 | set_bit(vcpu->vcpu_id, &lpr_map); | ||
481 | else | ||
482 | __apic_accept_irq(vcpu->apic, delivery_mode, | ||
483 | vector, level, trig_mode); | ||
484 | } | ||
485 | } | ||
486 | |||
487 | if (delivery_mode == APIC_DM_LOWEST) { | ||
488 | target = kvm_apic_round_robin(vcpu->kvm, vector, lpr_map); | ||
489 | if (target != NULL) | ||
490 | __apic_accept_irq(target, delivery_mode, | ||
491 | vector, level, trig_mode); | ||
492 | } | ||
493 | } | ||
494 | |||
495 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | ||
496 | { | ||
497 | u32 counter_passed; | ||
498 | ktime_t passed, now = apic->timer.dev.base->get_time(); | ||
499 | u32 tmcct = apic_get_reg(apic, APIC_TMICT); | ||
500 | |||
501 | ASSERT(apic != NULL); | ||
502 | |||
503 | if (unlikely(ktime_to_ns(now) <= | ||
504 | ktime_to_ns(apic->timer.last_update))) { | ||
505 | /* Wrap around */ | ||
506 | passed = ktime_add(( { | ||
507 | (ktime_t) { | ||
508 | .tv64 = KTIME_MAX - | ||
509 | (apic->timer.last_update).tv64}; } | ||
510 | ), now); | ||
511 | apic_debug("time elapsed\n"); | ||
512 | } else | ||
513 | passed = ktime_sub(now, apic->timer.last_update); | ||
514 | |||
515 | counter_passed = div64_64(ktime_to_ns(passed), | ||
516 | (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); | ||
517 | tmcct -= counter_passed; | ||
518 | |||
519 | if (tmcct <= 0) { | ||
520 | if (unlikely(!apic_lvtt_period(apic))) | ||
521 | tmcct = 0; | ||
522 | else | ||
523 | do { | ||
524 | tmcct += apic_get_reg(apic, APIC_TMICT); | ||
525 | } while (tmcct <= 0); | ||
526 | } | ||
527 | |||
528 | return tmcct; | ||
529 | } | ||
530 | |||
531 | static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) | ||
532 | { | ||
533 | u32 val = 0; | ||
534 | |||
535 | if (offset >= LAPIC_MMIO_LENGTH) | ||
536 | return 0; | ||
537 | |||
538 | switch (offset) { | ||
539 | case APIC_ARBPRI: | ||
540 | printk(KERN_WARNING "Access APIC ARBPRI register " | ||
541 | "which is for P6\n"); | ||
542 | break; | ||
543 | |||
544 | case APIC_TMCCT: /* Timer CCR */ | ||
545 | val = apic_get_tmcct(apic); | ||
546 | break; | ||
547 | |||
548 | default: | ||
549 | apic_update_ppr(apic); | ||
550 | val = apic_get_reg(apic, offset); | ||
551 | break; | ||
552 | } | ||
553 | |||
554 | return val; | ||
555 | } | ||
556 | |||
557 | static void apic_mmio_read(struct kvm_io_device *this, | ||
558 | gpa_t address, int len, void *data) | ||
559 | { | ||
560 | struct kvm_lapic *apic = (struct kvm_lapic *)this->private; | ||
561 | unsigned int offset = address - apic->base_address; | ||
562 | unsigned char alignment = offset & 0xf; | ||
563 | u32 result; | ||
564 | |||
565 | if ((alignment + len) > 4) { | ||
566 | printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d", | ||
567 | (unsigned long)address, len); | ||
568 | return; | ||
569 | } | ||
570 | result = __apic_read(apic, offset & ~0xf); | ||
571 | |||
572 | switch (len) { | ||
573 | case 1: | ||
574 | case 2: | ||
575 | case 4: | ||
576 | memcpy(data, (char *)&result + alignment, len); | ||
577 | break; | ||
578 | default: | ||
579 | printk(KERN_ERR "Local APIC read with len = %x, " | ||
580 | "should be 1,2, or 4 instead\n", len); | ||
581 | break; | ||
582 | } | ||
583 | } | ||
584 | |||
585 | static void update_divide_count(struct kvm_lapic *apic) | ||
586 | { | ||
587 | u32 tmp1, tmp2, tdcr; | ||
588 | |||
589 | tdcr = apic_get_reg(apic, APIC_TDCR); | ||
590 | tmp1 = tdcr & 0xf; | ||
591 | tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; | ||
592 | apic->timer.divide_count = 0x1 << (tmp2 & 0x7); | ||
593 | |||
594 | apic_debug("timer divide count is 0x%x\n", | ||
595 | apic->timer.divide_count); | ||
596 | } | ||
597 | |||
598 | static void start_apic_timer(struct kvm_lapic *apic) | ||
599 | { | ||
600 | ktime_t now = apic->timer.dev.base->get_time(); | ||
601 | |||
602 | apic->timer.last_update = now; | ||
603 | |||
604 | apic->timer.period = apic_get_reg(apic, APIC_TMICT) * | ||
605 | APIC_BUS_CYCLE_NS * apic->timer.divide_count; | ||
606 | atomic_set(&apic->timer.pending, 0); | ||
607 | hrtimer_start(&apic->timer.dev, | ||
608 | ktime_add_ns(now, apic->timer.period), | ||
609 | HRTIMER_MODE_ABS); | ||
610 | |||
611 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" | ||
612 | PRIx64 ", " | ||
613 | "timer initial count 0x%x, period %lldns, " | ||
614 | "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__, | ||
615 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), | ||
616 | apic_get_reg(apic, APIC_TMICT), | ||
617 | apic->timer.period, | ||
618 | ktime_to_ns(ktime_add_ns(now, | ||
619 | apic->timer.period))); | ||
620 | } | ||
621 | |||
622 | static void apic_mmio_write(struct kvm_io_device *this, | ||
623 | gpa_t address, int len, const void *data) | ||
624 | { | ||
625 | struct kvm_lapic *apic = (struct kvm_lapic *)this->private; | ||
626 | unsigned int offset = address - apic->base_address; | ||
627 | unsigned char alignment = offset & 0xf; | ||
628 | u32 val; | ||
629 | |||
630 | /* | ||
631 | * APIC register must be aligned on 128-bits boundary. | ||
632 | * 32/64/128 bits registers must be accessed thru 32 bits. | ||
633 | * Refer SDM 8.4.1 | ||
634 | */ | ||
635 | if (len != 4 || alignment) { | ||
636 | if (printk_ratelimit()) | ||
637 | printk(KERN_ERR "apic write: bad size=%d %lx\n", | ||
638 | len, (long)address); | ||
639 | return; | ||
640 | } | ||
641 | |||
642 | val = *(u32 *) data; | ||
643 | |||
644 | /* too common printing */ | ||
645 | if (offset != APIC_EOI) | ||
646 | apic_debug("%s: offset 0x%x with length 0x%x, and value is " | ||
647 | "0x%x\n", __FUNCTION__, offset, len, val); | ||
648 | |||
649 | offset &= 0xff0; | ||
650 | |||
651 | switch (offset) { | ||
652 | case APIC_ID: /* Local APIC ID */ | ||
653 | apic_set_reg(apic, APIC_ID, val); | ||
654 | break; | ||
655 | |||
656 | case APIC_TASKPRI: | ||
657 | apic_set_tpr(apic, val & 0xff); | ||
658 | break; | ||
659 | |||
660 | case APIC_EOI: | ||
661 | apic_set_eoi(apic); | ||
662 | break; | ||
663 | |||
664 | case APIC_LDR: | ||
665 | apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); | ||
666 | break; | ||
667 | |||
668 | case APIC_DFR: | ||
669 | apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); | ||
670 | break; | ||
671 | |||
672 | case APIC_SPIV: | ||
673 | apic_set_reg(apic, APIC_SPIV, val & 0x3ff); | ||
674 | if (!(val & APIC_SPIV_APIC_ENABLED)) { | ||
675 | int i; | ||
676 | u32 lvt_val; | ||
677 | |||
678 | for (i = 0; i < APIC_LVT_NUM; i++) { | ||
679 | lvt_val = apic_get_reg(apic, | ||
680 | APIC_LVTT + 0x10 * i); | ||
681 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, | ||
682 | lvt_val | APIC_LVT_MASKED); | ||
683 | } | ||
684 | atomic_set(&apic->timer.pending, 0); | ||
685 | |||
686 | } | ||
687 | break; | ||
688 | |||
689 | case APIC_ICR: | ||
690 | /* No delay here, so we always clear the pending bit */ | ||
691 | apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); | ||
692 | apic_send_ipi(apic); | ||
693 | break; | ||
694 | |||
695 | case APIC_ICR2: | ||
696 | apic_set_reg(apic, APIC_ICR2, val & 0xff000000); | ||
697 | break; | ||
698 | |||
699 | case APIC_LVTT: | ||
700 | case APIC_LVTTHMR: | ||
701 | case APIC_LVTPC: | ||
702 | case APIC_LVT0: | ||
703 | case APIC_LVT1: | ||
704 | case APIC_LVTERR: | ||
705 | /* TODO: Check vector */ | ||
706 | if (!apic_sw_enabled(apic)) | ||
707 | val |= APIC_LVT_MASKED; | ||
708 | |||
709 | val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4]; | ||
710 | apic_set_reg(apic, offset, val); | ||
711 | |||
712 | break; | ||
713 | |||
714 | case APIC_TMICT: | ||
715 | hrtimer_cancel(&apic->timer.dev); | ||
716 | apic_set_reg(apic, APIC_TMICT, val); | ||
717 | start_apic_timer(apic); | ||
718 | return; | ||
719 | |||
720 | case APIC_TDCR: | ||
721 | if (val & 4) | ||
722 | printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val); | ||
723 | apic_set_reg(apic, APIC_TDCR, val); | ||
724 | update_divide_count(apic); | ||
725 | break; | ||
726 | |||
727 | default: | ||
728 | apic_debug("Local APIC Write to read-only register %x\n", | ||
729 | offset); | ||
730 | break; | ||
731 | } | ||
732 | |||
733 | } | ||
734 | |||
735 | static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr) | ||
736 | { | ||
737 | struct kvm_lapic *apic = (struct kvm_lapic *)this->private; | ||
738 | int ret = 0; | ||
739 | |||
740 | |||
741 | if (apic_hw_enabled(apic) && | ||
742 | (addr >= apic->base_address) && | ||
743 | (addr < (apic->base_address + LAPIC_MMIO_LENGTH))) | ||
744 | ret = 1; | ||
745 | |||
746 | return ret; | ||
747 | } | ||
748 | |||
749 | void kvm_free_apic(struct kvm_lapic *apic) | ||
750 | { | ||
751 | if (!apic) | ||
752 | return; | ||
753 | |||
754 | hrtimer_cancel(&apic->timer.dev); | ||
755 | |||
756 | if (apic->regs_page) { | ||
757 | __free_page(apic->regs_page); | ||
758 | apic->regs_page = 0; | ||
759 | } | ||
760 | |||
761 | kfree(apic); | ||
762 | } | ||
763 | |||
764 | /* | ||
765 | *---------------------------------------------------------------------- | ||
766 | * LAPIC interface | ||
767 | *---------------------------------------------------------------------- | ||
768 | */ | ||
769 | |||
770 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) | ||
771 | { | ||
772 | struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; | ||
773 | |||
774 | if (!apic) | ||
775 | return; | ||
776 | apic_set_tpr(apic, ((cr8 & 0x0f) << 4)); | ||
777 | } | ||
778 | |||
779 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) | ||
780 | { | ||
781 | struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; | ||
782 | u64 tpr; | ||
783 | |||
784 | if (!apic) | ||
785 | return 0; | ||
786 | tpr = (u64) apic_get_reg(apic, APIC_TASKPRI); | ||
787 | |||
788 | return (tpr & 0xf0) >> 4; | ||
789 | } | ||
790 | EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8); | ||
791 | |||
792 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | ||
793 | { | ||
794 | struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; | ||
795 | |||
796 | if (!apic) { | ||
797 | value |= MSR_IA32_APICBASE_BSP; | ||
798 | vcpu->apic_base = value; | ||
799 | return; | ||
800 | } | ||
801 | if (apic->vcpu->vcpu_id) | ||
802 | value &= ~MSR_IA32_APICBASE_BSP; | ||
803 | |||
804 | vcpu->apic_base = value; | ||
805 | apic->base_address = apic->vcpu->apic_base & | ||
806 | MSR_IA32_APICBASE_BASE; | ||
807 | |||
808 | /* with FSB delivery interrupt, we can restart APIC functionality */ | ||
809 | apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " | ||
810 | "0x%lx.\n", apic->apic_base, apic->base_address); | ||
811 | |||
812 | } | ||
813 | |||
814 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu) | ||
815 | { | ||
816 | return vcpu->apic_base; | ||
817 | } | ||
818 | EXPORT_SYMBOL_GPL(kvm_lapic_get_base); | ||
819 | |||
820 | void kvm_lapic_reset(struct kvm_vcpu *vcpu) | ||
821 | { | ||
822 | struct kvm_lapic *apic; | ||
823 | int i; | ||
824 | |||
825 | apic_debug("%s\n", __FUNCTION__); | ||
826 | |||
827 | ASSERT(vcpu); | ||
828 | apic = vcpu->apic; | ||
829 | ASSERT(apic != NULL); | ||
830 | |||
831 | /* Stop the timer in case it's a reset to an active apic */ | ||
832 | hrtimer_cancel(&apic->timer.dev); | ||
833 | |||
834 | apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); | ||
835 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); | ||
836 | |||
837 | for (i = 0; i < APIC_LVT_NUM; i++) | ||
838 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); | ||
839 | apic_set_reg(apic, APIC_LVT0, | ||
840 | SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); | ||
841 | |||
842 | apic_set_reg(apic, APIC_DFR, 0xffffffffU); | ||
843 | apic_set_reg(apic, APIC_SPIV, 0xff); | ||
844 | apic_set_reg(apic, APIC_TASKPRI, 0); | ||
845 | apic_set_reg(apic, APIC_LDR, 0); | ||
846 | apic_set_reg(apic, APIC_ESR, 0); | ||
847 | apic_set_reg(apic, APIC_ICR, 0); | ||
848 | apic_set_reg(apic, APIC_ICR2, 0); | ||
849 | apic_set_reg(apic, APIC_TDCR, 0); | ||
850 | apic_set_reg(apic, APIC_TMICT, 0); | ||
851 | for (i = 0; i < 8; i++) { | ||
852 | apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); | ||
853 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); | ||
854 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | ||
855 | } | ||
856 | apic->timer.divide_count = 0; | ||
857 | atomic_set(&apic->timer.pending, 0); | ||
858 | if (vcpu->vcpu_id == 0) | ||
859 | vcpu->apic_base |= MSR_IA32_APICBASE_BSP; | ||
860 | apic_update_ppr(apic); | ||
861 | |||
862 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" | ||
863 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, | ||
864 | vcpu, kvm_apic_id(apic), | ||
865 | vcpu->apic_base, apic->base_address); | ||
866 | } | ||
867 | EXPORT_SYMBOL_GPL(kvm_lapic_reset); | ||
868 | |||
869 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | ||
870 | { | ||
871 | struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; | ||
872 | int ret = 0; | ||
873 | |||
874 | if (!apic) | ||
875 | return 0; | ||
876 | ret = apic_enabled(apic); | ||
877 | |||
878 | return ret; | ||
879 | } | ||
880 | EXPORT_SYMBOL_GPL(kvm_lapic_enabled); | ||
881 | |||
882 | /* | ||
883 | *---------------------------------------------------------------------- | ||
884 | * timer interface | ||
885 | *---------------------------------------------------------------------- | ||
886 | */ | ||
887 | |||
888 | /* TODO: make sure __apic_timer_fn runs in current pCPU */ | ||
889 | static int __apic_timer_fn(struct kvm_lapic *apic) | ||
890 | { | ||
891 | int result = 0; | ||
892 | wait_queue_head_t *q = &apic->vcpu->wq; | ||
893 | |||
894 | atomic_inc(&apic->timer.pending); | ||
895 | if (waitqueue_active(q)) | ||
896 | { | ||
897 | apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; | ||
898 | wake_up_interruptible(q); | ||
899 | } | ||
900 | if (apic_lvtt_period(apic)) { | ||
901 | result = 1; | ||
902 | apic->timer.dev.expires = ktime_add_ns( | ||
903 | apic->timer.dev.expires, | ||
904 | apic->timer.period); | ||
905 | } | ||
906 | return result; | ||
907 | } | ||
908 | |||
909 | static int __inject_apic_timer_irq(struct kvm_lapic *apic) | ||
910 | { | ||
911 | int vector; | ||
912 | |||
913 | vector = apic_lvt_vector(apic, APIC_LVTT); | ||
914 | return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); | ||
915 | } | ||
916 | |||
917 | static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) | ||
918 | { | ||
919 | struct kvm_lapic *apic; | ||
920 | int restart_timer = 0; | ||
921 | |||
922 | apic = container_of(data, struct kvm_lapic, timer.dev); | ||
923 | |||
924 | restart_timer = __apic_timer_fn(apic); | ||
925 | |||
926 | if (restart_timer) | ||
927 | return HRTIMER_RESTART; | ||
928 | else | ||
929 | return HRTIMER_NORESTART; | ||
930 | } | ||
931 | |||
932 | int kvm_create_lapic(struct kvm_vcpu *vcpu) | ||
933 | { | ||
934 | struct kvm_lapic *apic; | ||
935 | |||
936 | ASSERT(vcpu != NULL); | ||
937 | apic_debug("apic_init %d\n", vcpu->vcpu_id); | ||
938 | |||
939 | apic = kzalloc(sizeof(*apic), GFP_KERNEL); | ||
940 | if (!apic) | ||
941 | goto nomem; | ||
942 | |||
943 | vcpu->apic = apic; | ||
944 | |||
945 | apic->regs_page = alloc_page(GFP_KERNEL); | ||
946 | if (apic->regs_page == NULL) { | ||
947 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", | ||
948 | vcpu->vcpu_id); | ||
949 | goto nomem; | ||
950 | } | ||
951 | apic->regs = page_address(apic->regs_page); | ||
952 | memset(apic->regs, 0, PAGE_SIZE); | ||
953 | apic->vcpu = vcpu; | ||
954 | |||
955 | hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
956 | apic->timer.dev.function = apic_timer_fn; | ||
957 | apic->base_address = APIC_DEFAULT_PHYS_BASE; | ||
958 | vcpu->apic_base = APIC_DEFAULT_PHYS_BASE; | ||
959 | |||
960 | kvm_lapic_reset(vcpu); | ||
961 | apic->dev.read = apic_mmio_read; | ||
962 | apic->dev.write = apic_mmio_write; | ||
963 | apic->dev.in_range = apic_mmio_range; | ||
964 | apic->dev.private = apic; | ||
965 | |||
966 | return 0; | ||
967 | nomem: | ||
968 | kvm_free_apic(apic); | ||
969 | return -ENOMEM; | ||
970 | } | ||
971 | EXPORT_SYMBOL_GPL(kvm_create_lapic); | ||
972 | |||
973 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) | ||
974 | { | ||
975 | struct kvm_lapic *apic = vcpu->apic; | ||
976 | int highest_irr; | ||
977 | |||
978 | if (!apic || !apic_enabled(apic)) | ||
979 | return -1; | ||
980 | |||
981 | apic_update_ppr(apic); | ||
982 | highest_irr = apic_find_highest_irr(apic); | ||
983 | if ((highest_irr == -1) || | ||
984 | ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) | ||
985 | return -1; | ||
986 | return highest_irr; | ||
987 | } | ||
988 | |||
989 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) | ||
990 | { | ||
991 | u32 lvt0 = apic_get_reg(vcpu->apic, APIC_LVT0); | ||
992 | int r = 0; | ||
993 | |||
994 | if (vcpu->vcpu_id == 0) { | ||
995 | if (!apic_hw_enabled(vcpu->apic)) | ||
996 | r = 1; | ||
997 | if ((lvt0 & APIC_LVT_MASKED) == 0 && | ||
998 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) | ||
999 | r = 1; | ||
1000 | } | ||
1001 | return r; | ||
1002 | } | ||
1003 | |||
1004 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | ||
1005 | { | ||
1006 | struct kvm_lapic *apic = vcpu->apic; | ||
1007 | |||
1008 | if (apic && apic_lvt_enabled(apic, APIC_LVTT) && | ||
1009 | atomic_read(&apic->timer.pending) > 0) { | ||
1010 | if (__inject_apic_timer_irq(apic)) | ||
1011 | atomic_dec(&apic->timer.pending); | ||
1012 | } | ||
1013 | } | ||
1014 | |||
1015 | void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | ||
1016 | { | ||
1017 | struct kvm_lapic *apic = vcpu->apic; | ||
1018 | |||
1019 | if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) | ||
1020 | apic->timer.last_update = ktime_add_ns( | ||
1021 | apic->timer.last_update, | ||
1022 | apic->timer.period); | ||
1023 | } | ||
1024 | |||
1025 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) | ||
1026 | { | ||
1027 | int vector = kvm_apic_has_interrupt(vcpu); | ||
1028 | struct kvm_lapic *apic = vcpu->apic; | ||
1029 | |||
1030 | if (vector == -1) | ||
1031 | return -1; | ||
1032 | |||
1033 | apic_set_vector(vector, apic->regs + APIC_ISR); | ||
1034 | apic_update_ppr(apic); | ||
1035 | apic_clear_irr(vector, apic); | ||
1036 | return vector; | ||
1037 | } | ||
1038 | |||
1039 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | ||
1040 | { | ||
1041 | struct kvm_lapic *apic = vcpu->apic; | ||
1042 | |||
1043 | apic->base_address = vcpu->apic_base & | ||
1044 | MSR_IA32_APICBASE_BASE; | ||
1045 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); | ||
1046 | apic_update_ppr(apic); | ||
1047 | hrtimer_cancel(&apic->timer.dev); | ||
1048 | update_divide_count(apic); | ||
1049 | start_apic_timer(apic); | ||
1050 | } | ||
1051 | |||
1052 | void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | ||
1053 | { | ||
1054 | struct kvm_lapic *apic = vcpu->apic; | ||
1055 | struct hrtimer *timer; | ||
1056 | |||
1057 | if (!apic) | ||
1058 | return; | ||
1059 | |||
1060 | timer = &apic->timer.dev; | ||
1061 | if (hrtimer_cancel(timer)) | ||
1062 | hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); | ||
1063 | } | ||
1064 | EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer); | ||
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 23965aa5ee78..6d84d30f5ed0 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -158,7 +158,7 @@ static struct kmem_cache *mmu_page_header_cache; | |||
158 | 158 | ||
159 | static int is_write_protection(struct kvm_vcpu *vcpu) | 159 | static int is_write_protection(struct kvm_vcpu *vcpu) |
160 | { | 160 | { |
161 | return vcpu->cr0 & CR0_WP_MASK; | 161 | return vcpu->cr0 & X86_CR0_WP; |
162 | } | 162 | } |
163 | 163 | ||
164 | static int is_cpuid_PSE36(void) | 164 | static int is_cpuid_PSE36(void) |
@@ -202,15 +202,14 @@ static void set_shadow_pte(u64 *sptep, u64 spte) | |||
202 | } | 202 | } |
203 | 203 | ||
204 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 204 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
205 | struct kmem_cache *base_cache, int min, | 205 | struct kmem_cache *base_cache, int min) |
206 | gfp_t gfp_flags) | ||
207 | { | 206 | { |
208 | void *obj; | 207 | void *obj; |
209 | 208 | ||
210 | if (cache->nobjs >= min) | 209 | if (cache->nobjs >= min) |
211 | return 0; | 210 | return 0; |
212 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 211 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
213 | obj = kmem_cache_zalloc(base_cache, gfp_flags); | 212 | obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); |
214 | if (!obj) | 213 | if (!obj) |
215 | return -ENOMEM; | 214 | return -ENOMEM; |
216 | cache->objects[cache->nobjs++] = obj; | 215 | cache->objects[cache->nobjs++] = obj; |
@@ -225,14 +224,14 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | |||
225 | } | 224 | } |
226 | 225 | ||
227 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | 226 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
228 | int min, gfp_t gfp_flags) | 227 | int min) |
229 | { | 228 | { |
230 | struct page *page; | 229 | struct page *page; |
231 | 230 | ||
232 | if (cache->nobjs >= min) | 231 | if (cache->nobjs >= min) |
233 | return 0; | 232 | return 0; |
234 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 233 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
235 | page = alloc_page(gfp_flags); | 234 | page = alloc_page(GFP_KERNEL); |
236 | if (!page) | 235 | if (!page) |
237 | return -ENOMEM; | 236 | return -ENOMEM; |
238 | set_page_private(page, 0); | 237 | set_page_private(page, 0); |
@@ -247,44 +246,28 @@ static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc) | |||
247 | free_page((unsigned long)mc->objects[--mc->nobjs]); | 246 | free_page((unsigned long)mc->objects[--mc->nobjs]); |
248 | } | 247 | } |
249 | 248 | ||
250 | static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) | 249 | static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) |
251 | { | 250 | { |
252 | int r; | 251 | int r; |
253 | 252 | ||
253 | kvm_mmu_free_some_pages(vcpu); | ||
254 | r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, | 254 | r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, |
255 | pte_chain_cache, 4, gfp_flags); | 255 | pte_chain_cache, 4); |
256 | if (r) | 256 | if (r) |
257 | goto out; | 257 | goto out; |
258 | r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, | 258 | r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, |
259 | rmap_desc_cache, 1, gfp_flags); | 259 | rmap_desc_cache, 1); |
260 | if (r) | 260 | if (r) |
261 | goto out; | 261 | goto out; |
262 | r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4, gfp_flags); | 262 | r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4); |
263 | if (r) | 263 | if (r) |
264 | goto out; | 264 | goto out; |
265 | r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, | 265 | r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, |
266 | mmu_page_header_cache, 4, gfp_flags); | 266 | mmu_page_header_cache, 4); |
267 | out: | 267 | out: |
268 | return r; | 268 | return r; |
269 | } | 269 | } |
270 | 270 | ||
271 | static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | ||
272 | { | ||
273 | int r; | ||
274 | |||
275 | r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT); | ||
276 | kvm_mmu_free_some_pages(vcpu); | ||
277 | if (r < 0) { | ||
278 | spin_unlock(&vcpu->kvm->lock); | ||
279 | kvm_arch_ops->vcpu_put(vcpu); | ||
280 | r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); | ||
281 | kvm_arch_ops->vcpu_load(vcpu); | ||
282 | spin_lock(&vcpu->kvm->lock); | ||
283 | kvm_mmu_free_some_pages(vcpu); | ||
284 | } | ||
285 | return r; | ||
286 | } | ||
287 | |||
288 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | 271 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) |
289 | { | 272 | { |
290 | mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); | 273 | mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); |
@@ -969,7 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
969 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 952 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
970 | { | 953 | { |
971 | ++vcpu->stat.tlb_flush; | 954 | ++vcpu->stat.tlb_flush; |
972 | kvm_arch_ops->tlb_flush(vcpu); | 955 | kvm_x86_ops->tlb_flush(vcpu); |
973 | } | 956 | } |
974 | 957 | ||
975 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 958 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
@@ -982,7 +965,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, | |||
982 | u64 addr, | 965 | u64 addr, |
983 | u32 err_code) | 966 | u32 err_code) |
984 | { | 967 | { |
985 | kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); | 968 | kvm_x86_ops->inject_page_fault(vcpu, addr, err_code); |
986 | } | 969 | } |
987 | 970 | ||
988 | static void paging_free(struct kvm_vcpu *vcpu) | 971 | static void paging_free(struct kvm_vcpu *vcpu) |
@@ -1071,15 +1054,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
1071 | { | 1054 | { |
1072 | int r; | 1055 | int r; |
1073 | 1056 | ||
1074 | spin_lock(&vcpu->kvm->lock); | 1057 | mutex_lock(&vcpu->kvm->lock); |
1075 | r = mmu_topup_memory_caches(vcpu); | 1058 | r = mmu_topup_memory_caches(vcpu); |
1076 | if (r) | 1059 | if (r) |
1077 | goto out; | 1060 | goto out; |
1078 | mmu_alloc_roots(vcpu); | 1061 | mmu_alloc_roots(vcpu); |
1079 | kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); | 1062 | kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); |
1080 | kvm_mmu_flush_tlb(vcpu); | 1063 | kvm_mmu_flush_tlb(vcpu); |
1081 | out: | 1064 | out: |
1082 | spin_unlock(&vcpu->kvm->lock); | 1065 | mutex_unlock(&vcpu->kvm->lock); |
1083 | return r; | 1066 | return r; |
1084 | } | 1067 | } |
1085 | EXPORT_SYMBOL_GPL(kvm_mmu_load); | 1068 | EXPORT_SYMBOL_GPL(kvm_mmu_load); |
@@ -1124,7 +1107,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
1124 | } | 1107 | } |
1125 | 1108 | ||
1126 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1109 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
1127 | const u8 *old, const u8 *new, int bytes) | 1110 | const u8 *new, int bytes) |
1128 | { | 1111 | { |
1129 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1112 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1130 | struct kvm_mmu_page *page; | 1113 | struct kvm_mmu_page *page; |
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 4b5391c717f8..6b094b44f8fb 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -58,7 +58,10 @@ struct guest_walker { | |||
58 | int level; | 58 | int level; |
59 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; | 59 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; |
60 | pt_element_t *table; | 60 | pt_element_t *table; |
61 | pt_element_t pte; | ||
61 | pt_element_t *ptep; | 62 | pt_element_t *ptep; |
63 | struct page *page; | ||
64 | int index; | ||
62 | pt_element_t inherited_ar; | 65 | pt_element_t inherited_ar; |
63 | gfn_t gfn; | 66 | gfn_t gfn; |
64 | u32 error_code; | 67 | u32 error_code; |
@@ -80,11 +83,14 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
80 | pgprintk("%s: addr %lx\n", __FUNCTION__, addr); | 83 | pgprintk("%s: addr %lx\n", __FUNCTION__, addr); |
81 | walker->level = vcpu->mmu.root_level; | 84 | walker->level = vcpu->mmu.root_level; |
82 | walker->table = NULL; | 85 | walker->table = NULL; |
86 | walker->page = NULL; | ||
87 | walker->ptep = NULL; | ||
83 | root = vcpu->cr3; | 88 | root = vcpu->cr3; |
84 | #if PTTYPE == 64 | 89 | #if PTTYPE == 64 |
85 | if (!is_long_mode(vcpu)) { | 90 | if (!is_long_mode(vcpu)) { |
86 | walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; | 91 | walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; |
87 | root = *walker->ptep; | 92 | root = *walker->ptep; |
93 | walker->pte = root; | ||
88 | if (!(root & PT_PRESENT_MASK)) | 94 | if (!(root & PT_PRESENT_MASK)) |
89 | goto not_present; | 95 | goto not_present; |
90 | --walker->level; | 96 | --walker->level; |
@@ -96,10 +102,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
96 | walker->level - 1, table_gfn); | 102 | walker->level - 1, table_gfn); |
97 | slot = gfn_to_memslot(vcpu->kvm, table_gfn); | 103 | slot = gfn_to_memslot(vcpu->kvm, table_gfn); |
98 | hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK); | 104 | hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK); |
99 | walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0); | 105 | walker->page = pfn_to_page(hpa >> PAGE_SHIFT); |
106 | walker->table = kmap_atomic(walker->page, KM_USER0); | ||
100 | 107 | ||
101 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 108 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || |
102 | (vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0); | 109 | (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); |
103 | 110 | ||
104 | walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; | 111 | walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; |
105 | 112 | ||
@@ -108,6 +115,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
108 | hpa_t paddr; | 115 | hpa_t paddr; |
109 | 116 | ||
110 | ptep = &walker->table[index]; | 117 | ptep = &walker->table[index]; |
118 | walker->index = index; | ||
111 | ASSERT(((unsigned long)walker->table & PAGE_MASK) == | 119 | ASSERT(((unsigned long)walker->table & PAGE_MASK) == |
112 | ((unsigned long)ptep & PAGE_MASK)); | 120 | ((unsigned long)ptep & PAGE_MASK)); |
113 | 121 | ||
@@ -148,16 +156,20 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
148 | 156 | ||
149 | walker->inherited_ar &= walker->table[index]; | 157 | walker->inherited_ar &= walker->table[index]; |
150 | table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; | 158 | table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; |
151 | paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); | ||
152 | kunmap_atomic(walker->table, KM_USER0); | 159 | kunmap_atomic(walker->table, KM_USER0); |
153 | walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT), | 160 | paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT); |
154 | KM_USER0); | 161 | walker->page = pfn_to_page(paddr >> PAGE_SHIFT); |
162 | walker->table = kmap_atomic(walker->page, KM_USER0); | ||
155 | --walker->level; | 163 | --walker->level; |
156 | walker->table_gfn[walker->level - 1 ] = table_gfn; | 164 | walker->table_gfn[walker->level - 1 ] = table_gfn; |
157 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, | 165 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, |
158 | walker->level - 1, table_gfn); | 166 | walker->level - 1, table_gfn); |
159 | } | 167 | } |
160 | walker->ptep = ptep; | 168 | walker->pte = *ptep; |
169 | if (walker->page) | ||
170 | walker->ptep = NULL; | ||
171 | if (walker->table) | ||
172 | kunmap_atomic(walker->table, KM_USER0); | ||
161 | pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); | 173 | pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); |
162 | return 1; | 174 | return 1; |
163 | 175 | ||
@@ -175,13 +187,9 @@ err: | |||
175 | walker->error_code |= PFERR_USER_MASK; | 187 | walker->error_code |= PFERR_USER_MASK; |
176 | if (fetch_fault) | 188 | if (fetch_fault) |
177 | walker->error_code |= PFERR_FETCH_MASK; | 189 | walker->error_code |= PFERR_FETCH_MASK; |
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static void FNAME(release_walker)(struct guest_walker *walker) | ||
182 | { | ||
183 | if (walker->table) | 190 | if (walker->table) |
184 | kunmap_atomic(walker->table, KM_USER0); | 191 | kunmap_atomic(walker->table, KM_USER0); |
192 | return 0; | ||
185 | } | 193 | } |
186 | 194 | ||
187 | static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, | 195 | static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, |
@@ -193,7 +201,7 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, | |||
193 | static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | 201 | static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, |
194 | u64 *shadow_pte, | 202 | u64 *shadow_pte, |
195 | gpa_t gaddr, | 203 | gpa_t gaddr, |
196 | pt_element_t *gpte, | 204 | pt_element_t gpte, |
197 | u64 access_bits, | 205 | u64 access_bits, |
198 | int user_fault, | 206 | int user_fault, |
199 | int write_fault, | 207 | int write_fault, |
@@ -202,23 +210,34 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | |||
202 | gfn_t gfn) | 210 | gfn_t gfn) |
203 | { | 211 | { |
204 | hpa_t paddr; | 212 | hpa_t paddr; |
205 | int dirty = *gpte & PT_DIRTY_MASK; | 213 | int dirty = gpte & PT_DIRTY_MASK; |
206 | u64 spte = *shadow_pte; | 214 | u64 spte = *shadow_pte; |
207 | int was_rmapped = is_rmap_pte(spte); | 215 | int was_rmapped = is_rmap_pte(spte); |
208 | 216 | ||
209 | pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" | 217 | pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" |
210 | " user_fault %d gfn %lx\n", | 218 | " user_fault %d gfn %lx\n", |
211 | __FUNCTION__, spte, (u64)*gpte, access_bits, | 219 | __FUNCTION__, spte, (u64)gpte, access_bits, |
212 | write_fault, user_fault, gfn); | 220 | write_fault, user_fault, gfn); |
213 | 221 | ||
214 | if (write_fault && !dirty) { | 222 | if (write_fault && !dirty) { |
215 | *gpte |= PT_DIRTY_MASK; | 223 | pt_element_t *guest_ent, *tmp = NULL; |
224 | |||
225 | if (walker->ptep) | ||
226 | guest_ent = walker->ptep; | ||
227 | else { | ||
228 | tmp = kmap_atomic(walker->page, KM_USER0); | ||
229 | guest_ent = &tmp[walker->index]; | ||
230 | } | ||
231 | |||
232 | *guest_ent |= PT_DIRTY_MASK; | ||
233 | if (!walker->ptep) | ||
234 | kunmap_atomic(tmp, KM_USER0); | ||
216 | dirty = 1; | 235 | dirty = 1; |
217 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | 236 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); |
218 | } | 237 | } |
219 | 238 | ||
220 | spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; | 239 | spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; |
221 | spte |= *gpte & PT64_NX_MASK; | 240 | spte |= gpte & PT64_NX_MASK; |
222 | if (!dirty) | 241 | if (!dirty) |
223 | access_bits &= ~PT_WRITABLE_MASK; | 242 | access_bits &= ~PT_WRITABLE_MASK; |
224 | 243 | ||
@@ -255,7 +274,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | |||
255 | access_bits &= ~PT_WRITABLE_MASK; | 274 | access_bits &= ~PT_WRITABLE_MASK; |
256 | if (is_writeble_pte(spte)) { | 275 | if (is_writeble_pte(spte)) { |
257 | spte &= ~PT_WRITABLE_MASK; | 276 | spte &= ~PT_WRITABLE_MASK; |
258 | kvm_arch_ops->tlb_flush(vcpu); | 277 | kvm_x86_ops->tlb_flush(vcpu); |
259 | } | 278 | } |
260 | if (write_fault) | 279 | if (write_fault) |
261 | *ptwrite = 1; | 280 | *ptwrite = 1; |
@@ -273,13 +292,13 @@ unshadowed: | |||
273 | rmap_add(vcpu, shadow_pte); | 292 | rmap_add(vcpu, shadow_pte); |
274 | } | 293 | } |
275 | 294 | ||
276 | static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, | 295 | static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte, |
277 | u64 *shadow_pte, u64 access_bits, | 296 | u64 *shadow_pte, u64 access_bits, |
278 | int user_fault, int write_fault, int *ptwrite, | 297 | int user_fault, int write_fault, int *ptwrite, |
279 | struct guest_walker *walker, gfn_t gfn) | 298 | struct guest_walker *walker, gfn_t gfn) |
280 | { | 299 | { |
281 | access_bits &= *gpte; | 300 | access_bits &= gpte; |
282 | FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, | 301 | FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK, |
283 | gpte, access_bits, user_fault, write_fault, | 302 | gpte, access_bits, user_fault, write_fault, |
284 | ptwrite, walker, gfn); | 303 | ptwrite, walker, gfn); |
285 | } | 304 | } |
@@ -295,22 +314,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
295 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) | 314 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) |
296 | return; | 315 | return; |
297 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); | 316 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); |
298 | FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, | 317 | FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, |
299 | 0, NULL, NULL, | 318 | 0, NULL, NULL, |
300 | (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); | 319 | (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); |
301 | } | 320 | } |
302 | 321 | ||
303 | static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, | 322 | static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde, |
304 | u64 *shadow_pte, u64 access_bits, | 323 | u64 *shadow_pte, u64 access_bits, |
305 | int user_fault, int write_fault, int *ptwrite, | 324 | int user_fault, int write_fault, int *ptwrite, |
306 | struct guest_walker *walker, gfn_t gfn) | 325 | struct guest_walker *walker, gfn_t gfn) |
307 | { | 326 | { |
308 | gpa_t gaddr; | 327 | gpa_t gaddr; |
309 | 328 | ||
310 | access_bits &= *gpde; | 329 | access_bits &= gpde; |
311 | gaddr = (gpa_t)gfn << PAGE_SHIFT; | 330 | gaddr = (gpa_t)gfn << PAGE_SHIFT; |
312 | if (PTTYPE == 32 && is_cpuid_PSE36()) | 331 | if (PTTYPE == 32 && is_cpuid_PSE36()) |
313 | gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << | 332 | gaddr |= (gpde & PT32_DIR_PSE36_MASK) << |
314 | (32 - PT32_DIR_PSE36_SHIFT); | 333 | (32 - PT32_DIR_PSE36_SHIFT); |
315 | FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, | 334 | FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, |
316 | gpde, access_bits, user_fault, write_fault, | 335 | gpde, access_bits, user_fault, write_fault, |
@@ -328,9 +347,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
328 | int level; | 347 | int level; |
329 | u64 *shadow_ent; | 348 | u64 *shadow_ent; |
330 | u64 *prev_shadow_ent = NULL; | 349 | u64 *prev_shadow_ent = NULL; |
331 | pt_element_t *guest_ent = walker->ptep; | ||
332 | 350 | ||
333 | if (!is_present_pte(*guest_ent)) | 351 | if (!is_present_pte(walker->pte)) |
334 | return NULL; | 352 | return NULL; |
335 | 353 | ||
336 | shadow_addr = vcpu->mmu.root_hpa; | 354 | shadow_addr = vcpu->mmu.root_hpa; |
@@ -364,12 +382,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
364 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 382 | if (level - 1 == PT_PAGE_TABLE_LEVEL |
365 | && walker->level == PT_DIRECTORY_LEVEL) { | 383 | && walker->level == PT_DIRECTORY_LEVEL) { |
366 | metaphysical = 1; | 384 | metaphysical = 1; |
367 | hugepage_access = *guest_ent; | 385 | hugepage_access = walker->pte; |
368 | hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; | 386 | hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; |
369 | if (*guest_ent & PT64_NX_MASK) | 387 | if (walker->pte & PT64_NX_MASK) |
370 | hugepage_access |= (1 << 2); | 388 | hugepage_access |= (1 << 2); |
371 | hugepage_access >>= PT_WRITABLE_SHIFT; | 389 | hugepage_access >>= PT_WRITABLE_SHIFT; |
372 | table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) | 390 | table_gfn = (walker->pte & PT_BASE_ADDR_MASK) |
373 | >> PAGE_SHIFT; | 391 | >> PAGE_SHIFT; |
374 | } else { | 392 | } else { |
375 | metaphysical = 0; | 393 | metaphysical = 0; |
@@ -386,12 +404,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
386 | } | 404 | } |
387 | 405 | ||
388 | if (walker->level == PT_DIRECTORY_LEVEL) { | 406 | if (walker->level == PT_DIRECTORY_LEVEL) { |
389 | FNAME(set_pde)(vcpu, guest_ent, shadow_ent, | 407 | FNAME(set_pde)(vcpu, walker->pte, shadow_ent, |
390 | walker->inherited_ar, user_fault, write_fault, | 408 | walker->inherited_ar, user_fault, write_fault, |
391 | ptwrite, walker, walker->gfn); | 409 | ptwrite, walker, walker->gfn); |
392 | } else { | 410 | } else { |
393 | ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); | 411 | ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); |
394 | FNAME(set_pte)(vcpu, guest_ent, shadow_ent, | 412 | FNAME(set_pte)(vcpu, walker->pte, shadow_ent, |
395 | walker->inherited_ar, user_fault, write_fault, | 413 | walker->inherited_ar, user_fault, write_fault, |
396 | ptwrite, walker, walker->gfn); | 414 | ptwrite, walker, walker->gfn); |
397 | } | 415 | } |
@@ -442,7 +460,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
442 | if (!r) { | 460 | if (!r) { |
443 | pgprintk("%s: guest page fault\n", __FUNCTION__); | 461 | pgprintk("%s: guest page fault\n", __FUNCTION__); |
444 | inject_page_fault(vcpu, addr, walker.error_code); | 462 | inject_page_fault(vcpu, addr, walker.error_code); |
445 | FNAME(release_walker)(&walker); | ||
446 | vcpu->last_pt_write_count = 0; /* reset fork detector */ | 463 | vcpu->last_pt_write_count = 0; /* reset fork detector */ |
447 | return 0; | 464 | return 0; |
448 | } | 465 | } |
@@ -452,8 +469,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
452 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, | 469 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, |
453 | shadow_pte, *shadow_pte, write_pt); | 470 | shadow_pte, *shadow_pte, write_pt); |
454 | 471 | ||
455 | FNAME(release_walker)(&walker); | ||
456 | |||
457 | if (!write_pt) | 472 | if (!write_pt) |
458 | vcpu->last_pt_write_count = 0; /* reset fork detector */ | 473 | vcpu->last_pt_write_count = 0; /* reset fork detector */ |
459 | 474 | ||
@@ -482,7 +497,6 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | |||
482 | gpa |= vaddr & ~PAGE_MASK; | 497 | gpa |= vaddr & ~PAGE_MASK; |
483 | } | 498 | } |
484 | 499 | ||
485 | FNAME(release_walker)(&walker); | ||
486 | return gpa; | 500 | return gpa; |
487 | } | 501 | } |
488 | 502 | ||
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index bc818cc126e3..729f1cd93606 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c | |||
@@ -16,12 +16,12 @@ | |||
16 | 16 | ||
17 | #include "kvm_svm.h" | 17 | #include "kvm_svm.h" |
18 | #include "x86_emulate.h" | 18 | #include "x86_emulate.h" |
19 | #include "irq.h" | ||
19 | 20 | ||
20 | #include <linux/module.h> | 21 | #include <linux/module.h> |
21 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
22 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
23 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
24 | #include <linux/profile.h> | ||
25 | #include <linux/sched.h> | 25 | #include <linux/sched.h> |
26 | 26 | ||
27 | #include <asm/desc.h> | 27 | #include <asm/desc.h> |
@@ -38,7 +38,6 @@ MODULE_LICENSE("GPL"); | |||
38 | 38 | ||
39 | #define DR7_GD_MASK (1 << 13) | 39 | #define DR7_GD_MASK (1 << 13) |
40 | #define DR6_BD_MASK (1 << 13) | 40 | #define DR6_BD_MASK (1 << 13) |
41 | #define CR4_DE_MASK (1UL << 3) | ||
42 | 41 | ||
43 | #define SEG_TYPE_LDT 2 | 42 | #define SEG_TYPE_LDT 2 |
44 | #define SEG_TYPE_BUSY_TSS16 3 | 43 | #define SEG_TYPE_BUSY_TSS16 3 |
@@ -50,6 +49,13 @@ MODULE_LICENSE("GPL"); | |||
50 | #define SVM_FEATURE_LBRV (1 << 1) | 49 | #define SVM_FEATURE_LBRV (1 << 1) |
51 | #define SVM_DEATURE_SVML (1 << 2) | 50 | #define SVM_DEATURE_SVML (1 << 2) |
52 | 51 | ||
52 | static void kvm_reput_irq(struct vcpu_svm *svm); | ||
53 | |||
54 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) | ||
55 | { | ||
56 | return container_of(vcpu, struct vcpu_svm, vcpu); | ||
57 | } | ||
58 | |||
53 | unsigned long iopm_base; | 59 | unsigned long iopm_base; |
54 | unsigned long msrpm_base; | 60 | unsigned long msrpm_base; |
55 | 61 | ||
@@ -94,20 +100,6 @@ static inline u32 svm_has(u32 feat) | |||
94 | return svm_features & feat; | 100 | return svm_features & feat; |
95 | } | 101 | } |
96 | 102 | ||
97 | static unsigned get_addr_size(struct kvm_vcpu *vcpu) | ||
98 | { | ||
99 | struct vmcb_save_area *sa = &vcpu->svm->vmcb->save; | ||
100 | u16 cs_attrib; | ||
101 | |||
102 | if (!(sa->cr0 & CR0_PE_MASK) || (sa->rflags & X86_EFLAGS_VM)) | ||
103 | return 2; | ||
104 | |||
105 | cs_attrib = sa->cs.attrib; | ||
106 | |||
107 | return (cs_attrib & SVM_SELECTOR_L_MASK) ? 8 : | ||
108 | (cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2; | ||
109 | } | ||
110 | |||
111 | static inline u8 pop_irq(struct kvm_vcpu *vcpu) | 103 | static inline u8 pop_irq(struct kvm_vcpu *vcpu) |
112 | { | 104 | { |
113 | int word_index = __ffs(vcpu->irq_summary); | 105 | int word_index = __ffs(vcpu->irq_summary); |
@@ -182,7 +174,7 @@ static inline void write_dr7(unsigned long val) | |||
182 | 174 | ||
183 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | 175 | static inline void force_new_asid(struct kvm_vcpu *vcpu) |
184 | { | 176 | { |
185 | vcpu->svm->asid_generation--; | 177 | to_svm(vcpu)->asid_generation--; |
186 | } | 178 | } |
187 | 179 | ||
188 | static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) | 180 | static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) |
@@ -195,22 +187,24 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
195 | if (!(efer & KVM_EFER_LMA)) | 187 | if (!(efer & KVM_EFER_LMA)) |
196 | efer &= ~KVM_EFER_LME; | 188 | efer &= ~KVM_EFER_LME; |
197 | 189 | ||
198 | vcpu->svm->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; | 190 | to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; |
199 | vcpu->shadow_efer = efer; | 191 | vcpu->shadow_efer = efer; |
200 | } | 192 | } |
201 | 193 | ||
202 | static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) | 194 | static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) |
203 | { | 195 | { |
204 | vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | | 196 | struct vcpu_svm *svm = to_svm(vcpu); |
197 | |||
198 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | | ||
205 | SVM_EVTINJ_VALID_ERR | | 199 | SVM_EVTINJ_VALID_ERR | |
206 | SVM_EVTINJ_TYPE_EXEPT | | 200 | SVM_EVTINJ_TYPE_EXEPT | |
207 | GP_VECTOR; | 201 | GP_VECTOR; |
208 | vcpu->svm->vmcb->control.event_inj_err = error_code; | 202 | svm->vmcb->control.event_inj_err = error_code; |
209 | } | 203 | } |
210 | 204 | ||
211 | static void inject_ud(struct kvm_vcpu *vcpu) | 205 | static void inject_ud(struct kvm_vcpu *vcpu) |
212 | { | 206 | { |
213 | vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | | 207 | to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID | |
214 | SVM_EVTINJ_TYPE_EXEPT | | 208 | SVM_EVTINJ_TYPE_EXEPT | |
215 | UD_VECTOR; | 209 | UD_VECTOR; |
216 | } | 210 | } |
@@ -229,19 +223,21 @@ static int is_external_interrupt(u32 info) | |||
229 | 223 | ||
230 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | 224 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) |
231 | { | 225 | { |
232 | if (!vcpu->svm->next_rip) { | 226 | struct vcpu_svm *svm = to_svm(vcpu); |
227 | |||
228 | if (!svm->next_rip) { | ||
233 | printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); | 229 | printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); |
234 | return; | 230 | return; |
235 | } | 231 | } |
236 | if (vcpu->svm->next_rip - vcpu->svm->vmcb->save.rip > 15) { | 232 | if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) { |
237 | printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", | 233 | printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", |
238 | __FUNCTION__, | 234 | __FUNCTION__, |
239 | vcpu->svm->vmcb->save.rip, | 235 | svm->vmcb->save.rip, |
240 | vcpu->svm->next_rip); | 236 | svm->next_rip); |
241 | } | 237 | } |
242 | 238 | ||
243 | vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip; | 239 | vcpu->rip = svm->vmcb->save.rip = svm->next_rip; |
244 | vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; | 240 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; |
245 | 241 | ||
246 | vcpu->interrupt_window_open = 1; | 242 | vcpu->interrupt_window_open = 1; |
247 | } | 243 | } |
@@ -351,8 +347,8 @@ err_1: | |||
351 | 347 | ||
352 | } | 348 | } |
353 | 349 | ||
354 | static int set_msr_interception(u32 *msrpm, unsigned msr, | 350 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
355 | int read, int write) | 351 | int read, int write) |
356 | { | 352 | { |
357 | int i; | 353 | int i; |
358 | 354 | ||
@@ -367,11 +363,10 @@ static int set_msr_interception(u32 *msrpm, unsigned msr, | |||
367 | u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); | 363 | u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); |
368 | *base = (*base & ~(0x3 << msr_shift)) | | 364 | *base = (*base & ~(0x3 << msr_shift)) | |
369 | (mask << msr_shift); | 365 | (mask << msr_shift); |
370 | return 1; | 366 | return; |
371 | } | 367 | } |
372 | } | 368 | } |
373 | printk(KERN_DEBUG "%s: not found 0x%x\n", __FUNCTION__, msr); | 369 | BUG(); |
374 | return 0; | ||
375 | } | 370 | } |
376 | 371 | ||
377 | static __init int svm_hardware_setup(void) | 372 | static __init int svm_hardware_setup(void) |
@@ -382,8 +377,6 @@ static __init int svm_hardware_setup(void) | |||
382 | void *iopm_va, *msrpm_va; | 377 | void *iopm_va, *msrpm_va; |
383 | int r; | 378 | int r; |
384 | 379 | ||
385 | kvm_emulator_want_group7_invlpg(); | ||
386 | |||
387 | iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); | 380 | iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); |
388 | 381 | ||
389 | if (!iopm_pages) | 382 | if (!iopm_pages) |
@@ -458,11 +451,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) | |||
458 | seg->base = 0; | 451 | seg->base = 0; |
459 | } | 452 | } |
460 | 453 | ||
461 | static int svm_vcpu_setup(struct kvm_vcpu *vcpu) | ||
462 | { | ||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | static void init_vmcb(struct vmcb *vmcb) | 454 | static void init_vmcb(struct vmcb *vmcb) |
467 | { | 455 | { |
468 | struct vmcb_control_area *control = &vmcb->control; | 456 | struct vmcb_control_area *control = &vmcb->control; |
@@ -563,59 +551,83 @@ static void init_vmcb(struct vmcb *vmcb) | |||
563 | * cr0 val on cpu init should be 0x60000010, we enable cpu | 551 | * cr0 val on cpu init should be 0x60000010, we enable cpu |
564 | * cache by default. the orderly way is to enable cache in bios. | 552 | * cache by default. the orderly way is to enable cache in bios. |
565 | */ | 553 | */ |
566 | save->cr0 = 0x00000010 | CR0_PG_MASK | CR0_WP_MASK; | 554 | save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; |
567 | save->cr4 = CR4_PAE_MASK; | 555 | save->cr4 = X86_CR4_PAE; |
568 | /* rdx = ?? */ | 556 | /* rdx = ?? */ |
569 | } | 557 | } |
570 | 558 | ||
571 | static int svm_create_vcpu(struct kvm_vcpu *vcpu) | 559 | static void svm_vcpu_reset(struct kvm_vcpu *vcpu) |
560 | { | ||
561 | struct vcpu_svm *svm = to_svm(vcpu); | ||
562 | |||
563 | init_vmcb(svm->vmcb); | ||
564 | } | ||
565 | |||
566 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | ||
572 | { | 567 | { |
568 | struct vcpu_svm *svm; | ||
573 | struct page *page; | 569 | struct page *page; |
574 | int r; | 570 | int err; |
571 | |||
572 | svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | ||
573 | if (!svm) { | ||
574 | err = -ENOMEM; | ||
575 | goto out; | ||
576 | } | ||
577 | |||
578 | err = kvm_vcpu_init(&svm->vcpu, kvm, id); | ||
579 | if (err) | ||
580 | goto free_svm; | ||
581 | |||
582 | if (irqchip_in_kernel(kvm)) { | ||
583 | err = kvm_create_lapic(&svm->vcpu); | ||
584 | if (err < 0) | ||
585 | goto free_svm; | ||
586 | } | ||
575 | 587 | ||
576 | r = -ENOMEM; | ||
577 | vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL); | ||
578 | if (!vcpu->svm) | ||
579 | goto out1; | ||
580 | page = alloc_page(GFP_KERNEL); | 588 | page = alloc_page(GFP_KERNEL); |
581 | if (!page) | 589 | if (!page) { |
582 | goto out2; | 590 | err = -ENOMEM; |
583 | 591 | goto uninit; | |
584 | vcpu->svm->vmcb = page_address(page); | 592 | } |
585 | clear_page(vcpu->svm->vmcb); | ||
586 | vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | ||
587 | vcpu->svm->asid_generation = 0; | ||
588 | memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); | ||
589 | init_vmcb(vcpu->svm->vmcb); | ||
590 | |||
591 | fx_init(vcpu); | ||
592 | vcpu->fpu_active = 1; | ||
593 | vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | ||
594 | if (vcpu == &vcpu->kvm->vcpus[0]) | ||
595 | vcpu->apic_base |= MSR_IA32_APICBASE_BSP; | ||
596 | 593 | ||
597 | return 0; | 594 | svm->vmcb = page_address(page); |
595 | clear_page(svm->vmcb); | ||
596 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | ||
597 | svm->asid_generation = 0; | ||
598 | memset(svm->db_regs, 0, sizeof(svm->db_regs)); | ||
599 | init_vmcb(svm->vmcb); | ||
598 | 600 | ||
599 | out2: | 601 | fx_init(&svm->vcpu); |
600 | kfree(vcpu->svm); | 602 | svm->vcpu.fpu_active = 1; |
601 | out1: | 603 | svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
602 | return r; | 604 | if (svm->vcpu.vcpu_id == 0) |
605 | svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP; | ||
606 | |||
607 | return &svm->vcpu; | ||
608 | |||
609 | uninit: | ||
610 | kvm_vcpu_uninit(&svm->vcpu); | ||
611 | free_svm: | ||
612 | kmem_cache_free(kvm_vcpu_cache, svm); | ||
613 | out: | ||
614 | return ERR_PTR(err); | ||
603 | } | 615 | } |
604 | 616 | ||
605 | static void svm_free_vcpu(struct kvm_vcpu *vcpu) | 617 | static void svm_free_vcpu(struct kvm_vcpu *vcpu) |
606 | { | 618 | { |
607 | if (!vcpu->svm) | 619 | struct vcpu_svm *svm = to_svm(vcpu); |
608 | return; | 620 | |
609 | if (vcpu->svm->vmcb) | 621 | __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); |
610 | __free_page(pfn_to_page(vcpu->svm->vmcb_pa >> PAGE_SHIFT)); | 622 | kvm_vcpu_uninit(vcpu); |
611 | kfree(vcpu->svm); | 623 | kmem_cache_free(kvm_vcpu_cache, svm); |
612 | } | 624 | } |
613 | 625 | ||
614 | static void svm_vcpu_load(struct kvm_vcpu *vcpu) | 626 | static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
615 | { | 627 | { |
616 | int cpu, i; | 628 | struct vcpu_svm *svm = to_svm(vcpu); |
629 | int i; | ||
617 | 630 | ||
618 | cpu = get_cpu(); | ||
619 | if (unlikely(cpu != vcpu->cpu)) { | 631 | if (unlikely(cpu != vcpu->cpu)) { |
620 | u64 tsc_this, delta; | 632 | u64 tsc_this, delta; |
621 | 633 | ||
@@ -625,23 +637,24 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu) | |||
625 | */ | 637 | */ |
626 | rdtscll(tsc_this); | 638 | rdtscll(tsc_this); |
627 | delta = vcpu->host_tsc - tsc_this; | 639 | delta = vcpu->host_tsc - tsc_this; |
628 | vcpu->svm->vmcb->control.tsc_offset += delta; | 640 | svm->vmcb->control.tsc_offset += delta; |
629 | vcpu->cpu = cpu; | 641 | vcpu->cpu = cpu; |
642 | kvm_migrate_apic_timer(vcpu); | ||
630 | } | 643 | } |
631 | 644 | ||
632 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 645 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
633 | rdmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); | 646 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
634 | } | 647 | } |
635 | 648 | ||
636 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) | 649 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) |
637 | { | 650 | { |
651 | struct vcpu_svm *svm = to_svm(vcpu); | ||
638 | int i; | 652 | int i; |
639 | 653 | ||
640 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 654 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
641 | wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); | 655 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
642 | 656 | ||
643 | rdtscll(vcpu->host_tsc); | 657 | rdtscll(vcpu->host_tsc); |
644 | put_cpu(); | ||
645 | } | 658 | } |
646 | 659 | ||
647 | static void svm_vcpu_decache(struct kvm_vcpu *vcpu) | 660 | static void svm_vcpu_decache(struct kvm_vcpu *vcpu) |
@@ -650,31 +663,34 @@ static void svm_vcpu_decache(struct kvm_vcpu *vcpu) | |||
650 | 663 | ||
651 | static void svm_cache_regs(struct kvm_vcpu *vcpu) | 664 | static void svm_cache_regs(struct kvm_vcpu *vcpu) |
652 | { | 665 | { |
653 | vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax; | 666 | struct vcpu_svm *svm = to_svm(vcpu); |
654 | vcpu->regs[VCPU_REGS_RSP] = vcpu->svm->vmcb->save.rsp; | 667 | |
655 | vcpu->rip = vcpu->svm->vmcb->save.rip; | 668 | vcpu->regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; |
669 | vcpu->regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
670 | vcpu->rip = svm->vmcb->save.rip; | ||
656 | } | 671 | } |
657 | 672 | ||
658 | static void svm_decache_regs(struct kvm_vcpu *vcpu) | 673 | static void svm_decache_regs(struct kvm_vcpu *vcpu) |
659 | { | 674 | { |
660 | vcpu->svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX]; | 675 | struct vcpu_svm *svm = to_svm(vcpu); |
661 | vcpu->svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP]; | 676 | svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX]; |
662 | vcpu->svm->vmcb->save.rip = vcpu->rip; | 677 | svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP]; |
678 | svm->vmcb->save.rip = vcpu->rip; | ||
663 | } | 679 | } |
664 | 680 | ||
665 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 681 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
666 | { | 682 | { |
667 | return vcpu->svm->vmcb->save.rflags; | 683 | return to_svm(vcpu)->vmcb->save.rflags; |
668 | } | 684 | } |
669 | 685 | ||
670 | static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 686 | static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
671 | { | 687 | { |
672 | vcpu->svm->vmcb->save.rflags = rflags; | 688 | to_svm(vcpu)->vmcb->save.rflags = rflags; |
673 | } | 689 | } |
674 | 690 | ||
675 | static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) | 691 | static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) |
676 | { | 692 | { |
677 | struct vmcb_save_area *save = &vcpu->svm->vmcb->save; | 693 | struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; |
678 | 694 | ||
679 | switch (seg) { | 695 | switch (seg) { |
680 | case VCPU_SREG_CS: return &save->cs; | 696 | case VCPU_SREG_CS: return &save->cs; |
@@ -716,36 +732,36 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
716 | var->unusable = !var->present; | 732 | var->unusable = !var->present; |
717 | } | 733 | } |
718 | 734 | ||
719 | static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | ||
720 | { | ||
721 | struct vmcb_seg *s = svm_seg(vcpu, VCPU_SREG_CS); | ||
722 | |||
723 | *db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | ||
724 | *l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; | ||
725 | } | ||
726 | |||
727 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 735 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) |
728 | { | 736 | { |
729 | dt->limit = vcpu->svm->vmcb->save.idtr.limit; | 737 | struct vcpu_svm *svm = to_svm(vcpu); |
730 | dt->base = vcpu->svm->vmcb->save.idtr.base; | 738 | |
739 | dt->limit = svm->vmcb->save.idtr.limit; | ||
740 | dt->base = svm->vmcb->save.idtr.base; | ||
731 | } | 741 | } |
732 | 742 | ||
733 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 743 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) |
734 | { | 744 | { |
735 | vcpu->svm->vmcb->save.idtr.limit = dt->limit; | 745 | struct vcpu_svm *svm = to_svm(vcpu); |
736 | vcpu->svm->vmcb->save.idtr.base = dt->base ; | 746 | |
747 | svm->vmcb->save.idtr.limit = dt->limit; | ||
748 | svm->vmcb->save.idtr.base = dt->base ; | ||
737 | } | 749 | } |
738 | 750 | ||
739 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 751 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) |
740 | { | 752 | { |
741 | dt->limit = vcpu->svm->vmcb->save.gdtr.limit; | 753 | struct vcpu_svm *svm = to_svm(vcpu); |
742 | dt->base = vcpu->svm->vmcb->save.gdtr.base; | 754 | |
755 | dt->limit = svm->vmcb->save.gdtr.limit; | ||
756 | dt->base = svm->vmcb->save.gdtr.base; | ||
743 | } | 757 | } |
744 | 758 | ||
745 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 759 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) |
746 | { | 760 | { |
747 | vcpu->svm->vmcb->save.gdtr.limit = dt->limit; | 761 | struct vcpu_svm *svm = to_svm(vcpu); |
748 | vcpu->svm->vmcb->save.gdtr.base = dt->base ; | 762 | |
763 | svm->vmcb->save.gdtr.limit = dt->limit; | ||
764 | svm->vmcb->save.gdtr.base = dt->base ; | ||
749 | } | 765 | } |
750 | 766 | ||
751 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 767 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
@@ -754,39 +770,42 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
754 | 770 | ||
755 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 771 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
756 | { | 772 | { |
773 | struct vcpu_svm *svm = to_svm(vcpu); | ||
774 | |||
757 | #ifdef CONFIG_X86_64 | 775 | #ifdef CONFIG_X86_64 |
758 | if (vcpu->shadow_efer & KVM_EFER_LME) { | 776 | if (vcpu->shadow_efer & KVM_EFER_LME) { |
759 | if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { | 777 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
760 | vcpu->shadow_efer |= KVM_EFER_LMA; | 778 | vcpu->shadow_efer |= KVM_EFER_LMA; |
761 | vcpu->svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME; | 779 | svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME; |
762 | } | 780 | } |
763 | 781 | ||
764 | if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK) ) { | 782 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) { |
765 | vcpu->shadow_efer &= ~KVM_EFER_LMA; | 783 | vcpu->shadow_efer &= ~KVM_EFER_LMA; |
766 | vcpu->svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME); | 784 | svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME); |
767 | } | 785 | } |
768 | } | 786 | } |
769 | #endif | 787 | #endif |
770 | if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) { | 788 | if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { |
771 | vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 789 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
772 | vcpu->fpu_active = 1; | 790 | vcpu->fpu_active = 1; |
773 | } | 791 | } |
774 | 792 | ||
775 | vcpu->cr0 = cr0; | 793 | vcpu->cr0 = cr0; |
776 | cr0 |= CR0_PG_MASK | CR0_WP_MASK; | 794 | cr0 |= X86_CR0_PG | X86_CR0_WP; |
777 | cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); | 795 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); |
778 | vcpu->svm->vmcb->save.cr0 = cr0; | 796 | svm->vmcb->save.cr0 = cr0; |
779 | } | 797 | } |
780 | 798 | ||
781 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 799 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
782 | { | 800 | { |
783 | vcpu->cr4 = cr4; | 801 | vcpu->cr4 = cr4; |
784 | vcpu->svm->vmcb->save.cr4 = cr4 | CR4_PAE_MASK; | 802 | to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; |
785 | } | 803 | } |
786 | 804 | ||
787 | static void svm_set_segment(struct kvm_vcpu *vcpu, | 805 | static void svm_set_segment(struct kvm_vcpu *vcpu, |
788 | struct kvm_segment *var, int seg) | 806 | struct kvm_segment *var, int seg) |
789 | { | 807 | { |
808 | struct vcpu_svm *svm = to_svm(vcpu); | ||
790 | struct vmcb_seg *s = svm_seg(vcpu, seg); | 809 | struct vmcb_seg *s = svm_seg(vcpu, seg); |
791 | 810 | ||
792 | s->base = var->base; | 811 | s->base = var->base; |
@@ -805,16 +824,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
805 | s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; | 824 | s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; |
806 | } | 825 | } |
807 | if (seg == VCPU_SREG_CS) | 826 | if (seg == VCPU_SREG_CS) |
808 | vcpu->svm->vmcb->save.cpl | 827 | svm->vmcb->save.cpl |
809 | = (vcpu->svm->vmcb->save.cs.attrib | 828 | = (svm->vmcb->save.cs.attrib |
810 | >> SVM_SELECTOR_DPL_SHIFT) & 3; | 829 | >> SVM_SELECTOR_DPL_SHIFT) & 3; |
811 | 830 | ||
812 | } | 831 | } |
813 | 832 | ||
814 | /* FIXME: | 833 | /* FIXME: |
815 | 834 | ||
816 | vcpu->svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 835 | svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK; |
817 | vcpu->svm->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); | 836 | svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); |
818 | 837 | ||
819 | */ | 838 | */ |
820 | 839 | ||
@@ -823,61 +842,68 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | |||
823 | return -EOPNOTSUPP; | 842 | return -EOPNOTSUPP; |
824 | } | 843 | } |
825 | 844 | ||
845 | static int svm_get_irq(struct kvm_vcpu *vcpu) | ||
846 | { | ||
847 | struct vcpu_svm *svm = to_svm(vcpu); | ||
848 | u32 exit_int_info = svm->vmcb->control.exit_int_info; | ||
849 | |||
850 | if (is_external_interrupt(exit_int_info)) | ||
851 | return exit_int_info & SVM_EVTINJ_VEC_MASK; | ||
852 | return -1; | ||
853 | } | ||
854 | |||
826 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 855 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
827 | { | 856 | { |
828 | #ifdef CONFIG_X86_64 | 857 | #ifdef CONFIG_X86_64 |
829 | wrmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); | 858 | wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); |
830 | #endif | 859 | #endif |
831 | } | 860 | } |
832 | 861 | ||
833 | static void save_host_msrs(struct kvm_vcpu *vcpu) | 862 | static void save_host_msrs(struct kvm_vcpu *vcpu) |
834 | { | 863 | { |
835 | #ifdef CONFIG_X86_64 | 864 | #ifdef CONFIG_X86_64 |
836 | rdmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); | 865 | rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); |
837 | #endif | 866 | #endif |
838 | } | 867 | } |
839 | 868 | ||
840 | static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) | 869 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) |
841 | { | 870 | { |
842 | if (svm_data->next_asid > svm_data->max_asid) { | 871 | if (svm_data->next_asid > svm_data->max_asid) { |
843 | ++svm_data->asid_generation; | 872 | ++svm_data->asid_generation; |
844 | svm_data->next_asid = 1; | 873 | svm_data->next_asid = 1; |
845 | vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; | 874 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; |
846 | } | 875 | } |
847 | 876 | ||
848 | vcpu->cpu = svm_data->cpu; | 877 | svm->vcpu.cpu = svm_data->cpu; |
849 | vcpu->svm->asid_generation = svm_data->asid_generation; | 878 | svm->asid_generation = svm_data->asid_generation; |
850 | vcpu->svm->vmcb->control.asid = svm_data->next_asid++; | 879 | svm->vmcb->control.asid = svm_data->next_asid++; |
851 | } | ||
852 | |||
853 | static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address) | ||
854 | { | ||
855 | invlpga(address, vcpu->svm->vmcb->control.asid); // is needed? | ||
856 | } | 880 | } |
857 | 881 | ||
858 | static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) | 882 | static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) |
859 | { | 883 | { |
860 | return vcpu->svm->db_regs[dr]; | 884 | return to_svm(vcpu)->db_regs[dr]; |
861 | } | 885 | } |
862 | 886 | ||
863 | static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | 887 | static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, |
864 | int *exception) | 888 | int *exception) |
865 | { | 889 | { |
890 | struct vcpu_svm *svm = to_svm(vcpu); | ||
891 | |||
866 | *exception = 0; | 892 | *exception = 0; |
867 | 893 | ||
868 | if (vcpu->svm->vmcb->save.dr7 & DR7_GD_MASK) { | 894 | if (svm->vmcb->save.dr7 & DR7_GD_MASK) { |
869 | vcpu->svm->vmcb->save.dr7 &= ~DR7_GD_MASK; | 895 | svm->vmcb->save.dr7 &= ~DR7_GD_MASK; |
870 | vcpu->svm->vmcb->save.dr6 |= DR6_BD_MASK; | 896 | svm->vmcb->save.dr6 |= DR6_BD_MASK; |
871 | *exception = DB_VECTOR; | 897 | *exception = DB_VECTOR; |
872 | return; | 898 | return; |
873 | } | 899 | } |
874 | 900 | ||
875 | switch (dr) { | 901 | switch (dr) { |
876 | case 0 ... 3: | 902 | case 0 ... 3: |
877 | vcpu->svm->db_regs[dr] = value; | 903 | svm->db_regs[dr] = value; |
878 | return; | 904 | return; |
879 | case 4 ... 5: | 905 | case 4 ... 5: |
880 | if (vcpu->cr4 & CR4_DE_MASK) { | 906 | if (vcpu->cr4 & X86_CR4_DE) { |
881 | *exception = UD_VECTOR; | 907 | *exception = UD_VECTOR; |
882 | return; | 908 | return; |
883 | } | 909 | } |
@@ -886,7 +912,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
886 | *exception = GP_VECTOR; | 912 | *exception = GP_VECTOR; |
887 | return; | 913 | return; |
888 | } | 914 | } |
889 | vcpu->svm->vmcb->save.dr7 = value; | 915 | svm->vmcb->save.dr7 = value; |
890 | return; | 916 | return; |
891 | } | 917 | } |
892 | default: | 918 | default: |
@@ -897,42 +923,44 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
897 | } | 923 | } |
898 | } | 924 | } |
899 | 925 | ||
900 | static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 926 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
901 | { | 927 | { |
902 | u32 exit_int_info = vcpu->svm->vmcb->control.exit_int_info; | 928 | u32 exit_int_info = svm->vmcb->control.exit_int_info; |
929 | struct kvm *kvm = svm->vcpu.kvm; | ||
903 | u64 fault_address; | 930 | u64 fault_address; |
904 | u32 error_code; | 931 | u32 error_code; |
905 | enum emulation_result er; | 932 | enum emulation_result er; |
906 | int r; | 933 | int r; |
907 | 934 | ||
908 | if (is_external_interrupt(exit_int_info)) | 935 | if (!irqchip_in_kernel(kvm) && |
909 | push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); | 936 | is_external_interrupt(exit_int_info)) |
937 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); | ||
910 | 938 | ||
911 | spin_lock(&vcpu->kvm->lock); | 939 | mutex_lock(&kvm->lock); |
912 | 940 | ||
913 | fault_address = vcpu->svm->vmcb->control.exit_info_2; | 941 | fault_address = svm->vmcb->control.exit_info_2; |
914 | error_code = vcpu->svm->vmcb->control.exit_info_1; | 942 | error_code = svm->vmcb->control.exit_info_1; |
915 | r = kvm_mmu_page_fault(vcpu, fault_address, error_code); | 943 | r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
916 | if (r < 0) { | 944 | if (r < 0) { |
917 | spin_unlock(&vcpu->kvm->lock); | 945 | mutex_unlock(&kvm->lock); |
918 | return r; | 946 | return r; |
919 | } | 947 | } |
920 | if (!r) { | 948 | if (!r) { |
921 | spin_unlock(&vcpu->kvm->lock); | 949 | mutex_unlock(&kvm->lock); |
922 | return 1; | 950 | return 1; |
923 | } | 951 | } |
924 | er = emulate_instruction(vcpu, kvm_run, fault_address, error_code); | 952 | er = emulate_instruction(&svm->vcpu, kvm_run, fault_address, |
925 | spin_unlock(&vcpu->kvm->lock); | 953 | error_code); |
954 | mutex_unlock(&kvm->lock); | ||
926 | 955 | ||
927 | switch (er) { | 956 | switch (er) { |
928 | case EMULATE_DONE: | 957 | case EMULATE_DONE: |
929 | return 1; | 958 | return 1; |
930 | case EMULATE_DO_MMIO: | 959 | case EMULATE_DO_MMIO: |
931 | ++vcpu->stat.mmio_exits; | 960 | ++svm->vcpu.stat.mmio_exits; |
932 | kvm_run->exit_reason = KVM_EXIT_MMIO; | ||
933 | return 0; | 961 | return 0; |
934 | case EMULATE_FAIL: | 962 | case EMULATE_FAIL: |
935 | vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); | 963 | kvm_report_emulation_failure(&svm->vcpu, "pagetable"); |
936 | break; | 964 | break; |
937 | default: | 965 | default: |
938 | BUG(); | 966 | BUG(); |
@@ -942,252 +970,142 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
942 | return 0; | 970 | return 0; |
943 | } | 971 | } |
944 | 972 | ||
945 | static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 973 | static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
946 | { | 974 | { |
947 | vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 975 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
948 | if (!(vcpu->cr0 & CR0_TS_MASK)) | 976 | if (!(svm->vcpu.cr0 & X86_CR0_TS)) |
949 | vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK; | 977 | svm->vmcb->save.cr0 &= ~X86_CR0_TS; |
950 | vcpu->fpu_active = 1; | 978 | svm->vcpu.fpu_active = 1; |
951 | 979 | ||
952 | return 1; | 980 | return 1; |
953 | } | 981 | } |
954 | 982 | ||
955 | static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 983 | static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
956 | { | 984 | { |
957 | /* | 985 | /* |
958 | * VMCB is undefined after a SHUTDOWN intercept | 986 | * VMCB is undefined after a SHUTDOWN intercept |
959 | * so reinitialize it. | 987 | * so reinitialize it. |
960 | */ | 988 | */ |
961 | clear_page(vcpu->svm->vmcb); | 989 | clear_page(svm->vmcb); |
962 | init_vmcb(vcpu->svm->vmcb); | 990 | init_vmcb(svm->vmcb); |
963 | 991 | ||
964 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 992 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; |
965 | return 0; | 993 | return 0; |
966 | } | 994 | } |
967 | 995 | ||
968 | static int io_get_override(struct kvm_vcpu *vcpu, | 996 | static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
969 | struct vmcb_seg **seg, | ||
970 | int *addr_override) | ||
971 | { | ||
972 | u8 inst[MAX_INST_SIZE]; | ||
973 | unsigned ins_length; | ||
974 | gva_t rip; | ||
975 | int i; | ||
976 | |||
977 | rip = vcpu->svm->vmcb->save.rip; | ||
978 | ins_length = vcpu->svm->next_rip - rip; | ||
979 | rip += vcpu->svm->vmcb->save.cs.base; | ||
980 | |||
981 | if (ins_length > MAX_INST_SIZE) | ||
982 | printk(KERN_DEBUG | ||
983 | "%s: inst length err, cs base 0x%llx rip 0x%llx " | ||
984 | "next rip 0x%llx ins_length %u\n", | ||
985 | __FUNCTION__, | ||
986 | vcpu->svm->vmcb->save.cs.base, | ||
987 | vcpu->svm->vmcb->save.rip, | ||
988 | vcpu->svm->vmcb->control.exit_info_2, | ||
989 | ins_length); | ||
990 | |||
991 | if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length) | ||
992 | /* #PF */ | ||
993 | return 0; | ||
994 | |||
995 | *addr_override = 0; | ||
996 | *seg = NULL; | ||
997 | for (i = 0; i < ins_length; i++) | ||
998 | switch (inst[i]) { | ||
999 | case 0xf0: | ||
1000 | case 0xf2: | ||
1001 | case 0xf3: | ||
1002 | case 0x66: | ||
1003 | continue; | ||
1004 | case 0x67: | ||
1005 | *addr_override = 1; | ||
1006 | continue; | ||
1007 | case 0x2e: | ||
1008 | *seg = &vcpu->svm->vmcb->save.cs; | ||
1009 | continue; | ||
1010 | case 0x36: | ||
1011 | *seg = &vcpu->svm->vmcb->save.ss; | ||
1012 | continue; | ||
1013 | case 0x3e: | ||
1014 | *seg = &vcpu->svm->vmcb->save.ds; | ||
1015 | continue; | ||
1016 | case 0x26: | ||
1017 | *seg = &vcpu->svm->vmcb->save.es; | ||
1018 | continue; | ||
1019 | case 0x64: | ||
1020 | *seg = &vcpu->svm->vmcb->save.fs; | ||
1021 | continue; | ||
1022 | case 0x65: | ||
1023 | *seg = &vcpu->svm->vmcb->save.gs; | ||
1024 | continue; | ||
1025 | default: | ||
1026 | return 1; | ||
1027 | } | ||
1028 | printk(KERN_DEBUG "%s: unexpected\n", __FUNCTION__); | ||
1029 | return 0; | ||
1030 | } | ||
1031 | |||
1032 | static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) | ||
1033 | { | 997 | { |
1034 | unsigned long addr_mask; | 998 | u32 io_info = svm->vmcb->control.exit_info_1; //address size bug? |
1035 | unsigned long *reg; | 999 | int size, down, in, string, rep; |
1036 | struct vmcb_seg *seg; | 1000 | unsigned port; |
1037 | int addr_override; | ||
1038 | struct vmcb_save_area *save_area = &vcpu->svm->vmcb->save; | ||
1039 | u16 cs_attrib = save_area->cs.attrib; | ||
1040 | unsigned addr_size = get_addr_size(vcpu); | ||
1041 | |||
1042 | if (!io_get_override(vcpu, &seg, &addr_override)) | ||
1043 | return 0; | ||
1044 | |||
1045 | if (addr_override) | ||
1046 | addr_size = (addr_size == 2) ? 4: (addr_size >> 1); | ||
1047 | 1001 | ||
1048 | if (ins) { | 1002 | ++svm->vcpu.stat.io_exits; |
1049 | reg = &vcpu->regs[VCPU_REGS_RDI]; | ||
1050 | seg = &vcpu->svm->vmcb->save.es; | ||
1051 | } else { | ||
1052 | reg = &vcpu->regs[VCPU_REGS_RSI]; | ||
1053 | seg = (seg) ? seg : &vcpu->svm->vmcb->save.ds; | ||
1054 | } | ||
1055 | 1003 | ||
1056 | addr_mask = ~0ULL >> (64 - (addr_size * 8)); | 1004 | svm->next_rip = svm->vmcb->control.exit_info_2; |
1057 | 1005 | ||
1058 | if ((cs_attrib & SVM_SELECTOR_L_MASK) && | 1006 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1059 | !(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_VM)) { | ||
1060 | *address = (*reg & addr_mask); | ||
1061 | return addr_mask; | ||
1062 | } | ||
1063 | 1007 | ||
1064 | if (!(seg->attrib & SVM_SELECTOR_P_SHIFT)) { | 1008 | if (string) { |
1065 | svm_inject_gp(vcpu, 0); | 1009 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO) |
1066 | return 0; | 1010 | return 0; |
1011 | return 1; | ||
1067 | } | 1012 | } |
1068 | 1013 | ||
1069 | *address = (*reg & addr_mask) + seg->base; | ||
1070 | return addr_mask; | ||
1071 | } | ||
1072 | |||
1073 | static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
1074 | { | ||
1075 | u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? | ||
1076 | int size, down, in, string, rep; | ||
1077 | unsigned port; | ||
1078 | unsigned long count; | ||
1079 | gva_t address = 0; | ||
1080 | |||
1081 | ++vcpu->stat.io_exits; | ||
1082 | |||
1083 | vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; | ||
1084 | |||
1085 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1014 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1086 | port = io_info >> 16; | 1015 | port = io_info >> 16; |
1087 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1016 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1088 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | ||
1089 | rep = (io_info & SVM_IOIO_REP_MASK) != 0; | 1017 | rep = (io_info & SVM_IOIO_REP_MASK) != 0; |
1090 | count = 1; | 1018 | down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; |
1091 | down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; | ||
1092 | 1019 | ||
1093 | if (string) { | 1020 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); |
1094 | unsigned addr_mask; | ||
1095 | |||
1096 | addr_mask = io_adress(vcpu, in, &address); | ||
1097 | if (!addr_mask) { | ||
1098 | printk(KERN_DEBUG "%s: get io address failed\n", | ||
1099 | __FUNCTION__); | ||
1100 | return 1; | ||
1101 | } | ||
1102 | |||
1103 | if (rep) | ||
1104 | count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; | ||
1105 | } | ||
1106 | return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, | ||
1107 | address, rep, port); | ||
1108 | } | 1021 | } |
1109 | 1022 | ||
1110 | static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1023 | static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1111 | { | 1024 | { |
1112 | return 1; | 1025 | return 1; |
1113 | } | 1026 | } |
1114 | 1027 | ||
1115 | static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1028 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1116 | { | 1029 | { |
1117 | vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; | 1030 | svm->next_rip = svm->vmcb->save.rip + 1; |
1118 | skip_emulated_instruction(vcpu); | 1031 | skip_emulated_instruction(&svm->vcpu); |
1119 | return kvm_emulate_halt(vcpu); | 1032 | return kvm_emulate_halt(&svm->vcpu); |
1120 | } | 1033 | } |
1121 | 1034 | ||
1122 | static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1035 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1123 | { | 1036 | { |
1124 | vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3; | 1037 | svm->next_rip = svm->vmcb->save.rip + 3; |
1125 | skip_emulated_instruction(vcpu); | 1038 | skip_emulated_instruction(&svm->vcpu); |
1126 | return kvm_hypercall(vcpu, kvm_run); | 1039 | return kvm_hypercall(&svm->vcpu, kvm_run); |
1127 | } | 1040 | } |
1128 | 1041 | ||
1129 | static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1042 | static int invalid_op_interception(struct vcpu_svm *svm, |
1043 | struct kvm_run *kvm_run) | ||
1130 | { | 1044 | { |
1131 | inject_ud(vcpu); | 1045 | inject_ud(&svm->vcpu); |
1132 | return 1; | 1046 | return 1; |
1133 | } | 1047 | } |
1134 | 1048 | ||
1135 | static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1049 | static int task_switch_interception(struct vcpu_svm *svm, |
1050 | struct kvm_run *kvm_run) | ||
1136 | { | 1051 | { |
1137 | printk(KERN_DEBUG "%s: task swiche is unsupported\n", __FUNCTION__); | 1052 | pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__); |
1138 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 1053 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; |
1139 | return 0; | 1054 | return 0; |
1140 | } | 1055 | } |
1141 | 1056 | ||
1142 | static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1057 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1143 | { | 1058 | { |
1144 | vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; | 1059 | svm->next_rip = svm->vmcb->save.rip + 2; |
1145 | kvm_emulate_cpuid(vcpu); | 1060 | kvm_emulate_cpuid(&svm->vcpu); |
1146 | return 1; | 1061 | return 1; |
1147 | } | 1062 | } |
1148 | 1063 | ||
1149 | static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1064 | static int emulate_on_interception(struct vcpu_svm *svm, |
1065 | struct kvm_run *kvm_run) | ||
1150 | { | 1066 | { |
1151 | if (emulate_instruction(vcpu, NULL, 0, 0) != EMULATE_DONE) | 1067 | if (emulate_instruction(&svm->vcpu, NULL, 0, 0) != EMULATE_DONE) |
1152 | printk(KERN_ERR "%s: failed\n", __FUNCTION__); | 1068 | pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__); |
1153 | return 1; | 1069 | return 1; |
1154 | } | 1070 | } |
1155 | 1071 | ||
1156 | static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | 1072 | static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) |
1157 | { | 1073 | { |
1074 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1075 | |||
1158 | switch (ecx) { | 1076 | switch (ecx) { |
1159 | case MSR_IA32_TIME_STAMP_COUNTER: { | 1077 | case MSR_IA32_TIME_STAMP_COUNTER: { |
1160 | u64 tsc; | 1078 | u64 tsc; |
1161 | 1079 | ||
1162 | rdtscll(tsc); | 1080 | rdtscll(tsc); |
1163 | *data = vcpu->svm->vmcb->control.tsc_offset + tsc; | 1081 | *data = svm->vmcb->control.tsc_offset + tsc; |
1164 | break; | 1082 | break; |
1165 | } | 1083 | } |
1166 | case MSR_K6_STAR: | 1084 | case MSR_K6_STAR: |
1167 | *data = vcpu->svm->vmcb->save.star; | 1085 | *data = svm->vmcb->save.star; |
1168 | break; | 1086 | break; |
1169 | #ifdef CONFIG_X86_64 | 1087 | #ifdef CONFIG_X86_64 |
1170 | case MSR_LSTAR: | 1088 | case MSR_LSTAR: |
1171 | *data = vcpu->svm->vmcb->save.lstar; | 1089 | *data = svm->vmcb->save.lstar; |
1172 | break; | 1090 | break; |
1173 | case MSR_CSTAR: | 1091 | case MSR_CSTAR: |
1174 | *data = vcpu->svm->vmcb->save.cstar; | 1092 | *data = svm->vmcb->save.cstar; |
1175 | break; | 1093 | break; |
1176 | case MSR_KERNEL_GS_BASE: | 1094 | case MSR_KERNEL_GS_BASE: |
1177 | *data = vcpu->svm->vmcb->save.kernel_gs_base; | 1095 | *data = svm->vmcb->save.kernel_gs_base; |
1178 | break; | 1096 | break; |
1179 | case MSR_SYSCALL_MASK: | 1097 | case MSR_SYSCALL_MASK: |
1180 | *data = vcpu->svm->vmcb->save.sfmask; | 1098 | *data = svm->vmcb->save.sfmask; |
1181 | break; | 1099 | break; |
1182 | #endif | 1100 | #endif |
1183 | case MSR_IA32_SYSENTER_CS: | 1101 | case MSR_IA32_SYSENTER_CS: |
1184 | *data = vcpu->svm->vmcb->save.sysenter_cs; | 1102 | *data = svm->vmcb->save.sysenter_cs; |
1185 | break; | 1103 | break; |
1186 | case MSR_IA32_SYSENTER_EIP: | 1104 | case MSR_IA32_SYSENTER_EIP: |
1187 | *data = vcpu->svm->vmcb->save.sysenter_eip; | 1105 | *data = svm->vmcb->save.sysenter_eip; |
1188 | break; | 1106 | break; |
1189 | case MSR_IA32_SYSENTER_ESP: | 1107 | case MSR_IA32_SYSENTER_ESP: |
1190 | *data = vcpu->svm->vmcb->save.sysenter_esp; | 1108 | *data = svm->vmcb->save.sysenter_esp; |
1191 | break; | 1109 | break; |
1192 | default: | 1110 | default: |
1193 | return kvm_get_msr_common(vcpu, ecx, data); | 1111 | return kvm_get_msr_common(vcpu, ecx, data); |
@@ -1195,57 +1113,59 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
1195 | return 0; | 1113 | return 0; |
1196 | } | 1114 | } |
1197 | 1115 | ||
1198 | static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1116 | static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1199 | { | 1117 | { |
1200 | u32 ecx = vcpu->regs[VCPU_REGS_RCX]; | 1118 | u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX]; |
1201 | u64 data; | 1119 | u64 data; |
1202 | 1120 | ||
1203 | if (svm_get_msr(vcpu, ecx, &data)) | 1121 | if (svm_get_msr(&svm->vcpu, ecx, &data)) |
1204 | svm_inject_gp(vcpu, 0); | 1122 | svm_inject_gp(&svm->vcpu, 0); |
1205 | else { | 1123 | else { |
1206 | vcpu->svm->vmcb->save.rax = data & 0xffffffff; | 1124 | svm->vmcb->save.rax = data & 0xffffffff; |
1207 | vcpu->regs[VCPU_REGS_RDX] = data >> 32; | 1125 | svm->vcpu.regs[VCPU_REGS_RDX] = data >> 32; |
1208 | vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; | 1126 | svm->next_rip = svm->vmcb->save.rip + 2; |
1209 | skip_emulated_instruction(vcpu); | 1127 | skip_emulated_instruction(&svm->vcpu); |
1210 | } | 1128 | } |
1211 | return 1; | 1129 | return 1; |
1212 | } | 1130 | } |
1213 | 1131 | ||
1214 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | 1132 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) |
1215 | { | 1133 | { |
1134 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1135 | |||
1216 | switch (ecx) { | 1136 | switch (ecx) { |
1217 | case MSR_IA32_TIME_STAMP_COUNTER: { | 1137 | case MSR_IA32_TIME_STAMP_COUNTER: { |
1218 | u64 tsc; | 1138 | u64 tsc; |
1219 | 1139 | ||
1220 | rdtscll(tsc); | 1140 | rdtscll(tsc); |
1221 | vcpu->svm->vmcb->control.tsc_offset = data - tsc; | 1141 | svm->vmcb->control.tsc_offset = data - tsc; |
1222 | break; | 1142 | break; |
1223 | } | 1143 | } |
1224 | case MSR_K6_STAR: | 1144 | case MSR_K6_STAR: |
1225 | vcpu->svm->vmcb->save.star = data; | 1145 | svm->vmcb->save.star = data; |
1226 | break; | 1146 | break; |
1227 | #ifdef CONFIG_X86_64 | 1147 | #ifdef CONFIG_X86_64 |
1228 | case MSR_LSTAR: | 1148 | case MSR_LSTAR: |
1229 | vcpu->svm->vmcb->save.lstar = data; | 1149 | svm->vmcb->save.lstar = data; |
1230 | break; | 1150 | break; |
1231 | case MSR_CSTAR: | 1151 | case MSR_CSTAR: |
1232 | vcpu->svm->vmcb->save.cstar = data; | 1152 | svm->vmcb->save.cstar = data; |
1233 | break; | 1153 | break; |
1234 | case MSR_KERNEL_GS_BASE: | 1154 | case MSR_KERNEL_GS_BASE: |
1235 | vcpu->svm->vmcb->save.kernel_gs_base = data; | 1155 | svm->vmcb->save.kernel_gs_base = data; |
1236 | break; | 1156 | break; |
1237 | case MSR_SYSCALL_MASK: | 1157 | case MSR_SYSCALL_MASK: |
1238 | vcpu->svm->vmcb->save.sfmask = data; | 1158 | svm->vmcb->save.sfmask = data; |
1239 | break; | 1159 | break; |
1240 | #endif | 1160 | #endif |
1241 | case MSR_IA32_SYSENTER_CS: | 1161 | case MSR_IA32_SYSENTER_CS: |
1242 | vcpu->svm->vmcb->save.sysenter_cs = data; | 1162 | svm->vmcb->save.sysenter_cs = data; |
1243 | break; | 1163 | break; |
1244 | case MSR_IA32_SYSENTER_EIP: | 1164 | case MSR_IA32_SYSENTER_EIP: |
1245 | vcpu->svm->vmcb->save.sysenter_eip = data; | 1165 | svm->vmcb->save.sysenter_eip = data; |
1246 | break; | 1166 | break; |
1247 | case MSR_IA32_SYSENTER_ESP: | 1167 | case MSR_IA32_SYSENTER_ESP: |
1248 | vcpu->svm->vmcb->save.sysenter_esp = data; | 1168 | svm->vmcb->save.sysenter_esp = data; |
1249 | break; | 1169 | break; |
1250 | default: | 1170 | default: |
1251 | return kvm_set_msr_common(vcpu, ecx, data); | 1171 | return kvm_set_msr_common(vcpu, ecx, data); |
@@ -1253,37 +1173,39 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
1253 | return 0; | 1173 | return 0; |
1254 | } | 1174 | } |
1255 | 1175 | ||
1256 | static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1176 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1257 | { | 1177 | { |
1258 | u32 ecx = vcpu->regs[VCPU_REGS_RCX]; | 1178 | u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX]; |
1259 | u64 data = (vcpu->svm->vmcb->save.rax & -1u) | 1179 | u64 data = (svm->vmcb->save.rax & -1u) |
1260 | | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32); | 1180 | | ((u64)(svm->vcpu.regs[VCPU_REGS_RDX] & -1u) << 32); |
1261 | vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; | 1181 | svm->next_rip = svm->vmcb->save.rip + 2; |
1262 | if (svm_set_msr(vcpu, ecx, data)) | 1182 | if (svm_set_msr(&svm->vcpu, ecx, data)) |
1263 | svm_inject_gp(vcpu, 0); | 1183 | svm_inject_gp(&svm->vcpu, 0); |
1264 | else | 1184 | else |
1265 | skip_emulated_instruction(vcpu); | 1185 | skip_emulated_instruction(&svm->vcpu); |
1266 | return 1; | 1186 | return 1; |
1267 | } | 1187 | } |
1268 | 1188 | ||
1269 | static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1189 | static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1270 | { | 1190 | { |
1271 | if (vcpu->svm->vmcb->control.exit_info_1) | 1191 | if (svm->vmcb->control.exit_info_1) |
1272 | return wrmsr_interception(vcpu, kvm_run); | 1192 | return wrmsr_interception(svm, kvm_run); |
1273 | else | 1193 | else |
1274 | return rdmsr_interception(vcpu, kvm_run); | 1194 | return rdmsr_interception(svm, kvm_run); |
1275 | } | 1195 | } |
1276 | 1196 | ||
1277 | static int interrupt_window_interception(struct kvm_vcpu *vcpu, | 1197 | static int interrupt_window_interception(struct vcpu_svm *svm, |
1278 | struct kvm_run *kvm_run) | 1198 | struct kvm_run *kvm_run) |
1279 | { | 1199 | { |
1200 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); | ||
1201 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | ||
1280 | /* | 1202 | /* |
1281 | * If the user space waits to inject interrupts, exit as soon as | 1203 | * If the user space waits to inject interrupts, exit as soon as |
1282 | * possible | 1204 | * possible |
1283 | */ | 1205 | */ |
1284 | if (kvm_run->request_interrupt_window && | 1206 | if (kvm_run->request_interrupt_window && |
1285 | !vcpu->irq_summary) { | 1207 | !svm->vcpu.irq_summary) { |
1286 | ++vcpu->stat.irq_window_exits; | 1208 | ++svm->vcpu.stat.irq_window_exits; |
1287 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 1209 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
1288 | return 0; | 1210 | return 0; |
1289 | } | 1211 | } |
@@ -1291,7 +1213,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu, | |||
1291 | return 1; | 1213 | return 1; |
1292 | } | 1214 | } |
1293 | 1215 | ||
1294 | static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, | 1216 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm, |
1295 | struct kvm_run *kvm_run) = { | 1217 | struct kvm_run *kvm_run) = { |
1296 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 1218 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
1297 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 1219 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
@@ -1338,15 +1260,25 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
1338 | }; | 1260 | }; |
1339 | 1261 | ||
1340 | 1262 | ||
1341 | static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1263 | static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
1342 | { | 1264 | { |
1343 | u32 exit_code = vcpu->svm->vmcb->control.exit_code; | 1265 | struct vcpu_svm *svm = to_svm(vcpu); |
1266 | u32 exit_code = svm->vmcb->control.exit_code; | ||
1267 | |||
1268 | kvm_reput_irq(svm); | ||
1344 | 1269 | ||
1345 | if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && | 1270 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
1271 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
1272 | kvm_run->fail_entry.hardware_entry_failure_reason | ||
1273 | = svm->vmcb->control.exit_code; | ||
1274 | return 0; | ||
1275 | } | ||
1276 | |||
1277 | if (is_external_interrupt(svm->vmcb->control.exit_int_info) && | ||
1346 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) | 1278 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) |
1347 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " | 1279 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " |
1348 | "exit_code 0x%x\n", | 1280 | "exit_code 0x%x\n", |
1349 | __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info, | 1281 | __FUNCTION__, svm->vmcb->control.exit_int_info, |
1350 | exit_code); | 1282 | exit_code); |
1351 | 1283 | ||
1352 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) | 1284 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) |
@@ -1356,7 +1288,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1356 | return 0; | 1288 | return 0; |
1357 | } | 1289 | } |
1358 | 1290 | ||
1359 | return svm_exit_handlers[exit_code](vcpu, kvm_run); | 1291 | return svm_exit_handlers[exit_code](svm, kvm_run); |
1360 | } | 1292 | } |
1361 | 1293 | ||
1362 | static void reload_tss(struct kvm_vcpu *vcpu) | 1294 | static void reload_tss(struct kvm_vcpu *vcpu) |
@@ -1368,93 +1300,126 @@ static void reload_tss(struct kvm_vcpu *vcpu) | |||
1368 | load_TR_desc(); | 1300 | load_TR_desc(); |
1369 | } | 1301 | } |
1370 | 1302 | ||
1371 | static void pre_svm_run(struct kvm_vcpu *vcpu) | 1303 | static void pre_svm_run(struct vcpu_svm *svm) |
1372 | { | 1304 | { |
1373 | int cpu = raw_smp_processor_id(); | 1305 | int cpu = raw_smp_processor_id(); |
1374 | 1306 | ||
1375 | struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); | 1307 | struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); |
1376 | 1308 | ||
1377 | vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; | 1309 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; |
1378 | if (vcpu->cpu != cpu || | 1310 | if (svm->vcpu.cpu != cpu || |
1379 | vcpu->svm->asid_generation != svm_data->asid_generation) | 1311 | svm->asid_generation != svm_data->asid_generation) |
1380 | new_asid(vcpu, svm_data); | 1312 | new_asid(svm, svm_data); |
1381 | } | 1313 | } |
1382 | 1314 | ||
1383 | 1315 | ||
1384 | static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | 1316 | static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) |
1385 | { | 1317 | { |
1386 | struct vmcb_control_area *control; | 1318 | struct vmcb_control_area *control; |
1387 | 1319 | ||
1388 | control = &vcpu->svm->vmcb->control; | 1320 | control = &svm->vmcb->control; |
1389 | control->int_vector = pop_irq(vcpu); | 1321 | control->int_vector = irq; |
1390 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 1322 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
1391 | control->int_ctl |= V_IRQ_MASK | | 1323 | control->int_ctl |= V_IRQ_MASK | |
1392 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); | 1324 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); |
1393 | } | 1325 | } |
1394 | 1326 | ||
1395 | static void kvm_reput_irq(struct kvm_vcpu *vcpu) | 1327 | static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) |
1328 | { | ||
1329 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1330 | |||
1331 | svm_inject_irq(svm, irq); | ||
1332 | } | ||
1333 | |||
1334 | static void svm_intr_assist(struct kvm_vcpu *vcpu) | ||
1335 | { | ||
1336 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1337 | struct vmcb *vmcb = svm->vmcb; | ||
1338 | int intr_vector = -1; | ||
1339 | |||
1340 | kvm_inject_pending_timer_irqs(vcpu); | ||
1341 | if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && | ||
1342 | ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) { | ||
1343 | intr_vector = vmcb->control.exit_int_info & | ||
1344 | SVM_EVTINJ_VEC_MASK; | ||
1345 | vmcb->control.exit_int_info = 0; | ||
1346 | svm_inject_irq(svm, intr_vector); | ||
1347 | return; | ||
1348 | } | ||
1349 | |||
1350 | if (vmcb->control.int_ctl & V_IRQ_MASK) | ||
1351 | return; | ||
1352 | |||
1353 | if (!kvm_cpu_has_interrupt(vcpu)) | ||
1354 | return; | ||
1355 | |||
1356 | if (!(vmcb->save.rflags & X86_EFLAGS_IF) || | ||
1357 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || | ||
1358 | (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { | ||
1359 | /* unable to deliver irq, set pending irq */ | ||
1360 | vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); | ||
1361 | svm_inject_irq(svm, 0x0); | ||
1362 | return; | ||
1363 | } | ||
1364 | /* Okay, we can deliver the interrupt: grab it and update PIC state. */ | ||
1365 | intr_vector = kvm_cpu_get_interrupt(vcpu); | ||
1366 | svm_inject_irq(svm, intr_vector); | ||
1367 | kvm_timer_intr_post(vcpu, intr_vector); | ||
1368 | } | ||
1369 | |||
1370 | static void kvm_reput_irq(struct vcpu_svm *svm) | ||
1396 | { | 1371 | { |
1397 | struct vmcb_control_area *control = &vcpu->svm->vmcb->control; | 1372 | struct vmcb_control_area *control = &svm->vmcb->control; |
1398 | 1373 | ||
1399 | if (control->int_ctl & V_IRQ_MASK) { | 1374 | if ((control->int_ctl & V_IRQ_MASK) |
1375 | && !irqchip_in_kernel(svm->vcpu.kvm)) { | ||
1400 | control->int_ctl &= ~V_IRQ_MASK; | 1376 | control->int_ctl &= ~V_IRQ_MASK; |
1401 | push_irq(vcpu, control->int_vector); | 1377 | push_irq(&svm->vcpu, control->int_vector); |
1402 | } | 1378 | } |
1403 | 1379 | ||
1404 | vcpu->interrupt_window_open = | 1380 | svm->vcpu.interrupt_window_open = |
1405 | !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); | 1381 | !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); |
1406 | } | 1382 | } |
1407 | 1383 | ||
1384 | static void svm_do_inject_vector(struct vcpu_svm *svm) | ||
1385 | { | ||
1386 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
1387 | int word_index = __ffs(vcpu->irq_summary); | ||
1388 | int bit_index = __ffs(vcpu->irq_pending[word_index]); | ||
1389 | int irq = word_index * BITS_PER_LONG + bit_index; | ||
1390 | |||
1391 | clear_bit(bit_index, &vcpu->irq_pending[word_index]); | ||
1392 | if (!vcpu->irq_pending[word_index]) | ||
1393 | clear_bit(word_index, &vcpu->irq_summary); | ||
1394 | svm_inject_irq(svm, irq); | ||
1395 | } | ||
1396 | |||
1408 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, | 1397 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, |
1409 | struct kvm_run *kvm_run) | 1398 | struct kvm_run *kvm_run) |
1410 | { | 1399 | { |
1411 | struct vmcb_control_area *control = &vcpu->svm->vmcb->control; | 1400 | struct vcpu_svm *svm = to_svm(vcpu); |
1401 | struct vmcb_control_area *control = &svm->vmcb->control; | ||
1412 | 1402 | ||
1413 | vcpu->interrupt_window_open = | 1403 | svm->vcpu.interrupt_window_open = |
1414 | (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && | 1404 | (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && |
1415 | (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); | 1405 | (svm->vmcb->save.rflags & X86_EFLAGS_IF)); |
1416 | 1406 | ||
1417 | if (vcpu->interrupt_window_open && vcpu->irq_summary) | 1407 | if (svm->vcpu.interrupt_window_open && svm->vcpu.irq_summary) |
1418 | /* | 1408 | /* |
1419 | * If interrupts enabled, and not blocked by sti or mov ss. Good. | 1409 | * If interrupts enabled, and not blocked by sti or mov ss. Good. |
1420 | */ | 1410 | */ |
1421 | kvm_do_inject_irq(vcpu); | 1411 | svm_do_inject_vector(svm); |
1422 | 1412 | ||
1423 | /* | 1413 | /* |
1424 | * Interrupts blocked. Wait for unblock. | 1414 | * Interrupts blocked. Wait for unblock. |
1425 | */ | 1415 | */ |
1426 | if (!vcpu->interrupt_window_open && | 1416 | if (!svm->vcpu.interrupt_window_open && |
1427 | (vcpu->irq_summary || kvm_run->request_interrupt_window)) { | 1417 | (svm->vcpu.irq_summary || kvm_run->request_interrupt_window)) { |
1428 | control->intercept |= 1ULL << INTERCEPT_VINTR; | 1418 | control->intercept |= 1ULL << INTERCEPT_VINTR; |
1429 | } else | 1419 | } else |
1430 | control->intercept &= ~(1ULL << INTERCEPT_VINTR); | 1420 | control->intercept &= ~(1ULL << INTERCEPT_VINTR); |
1431 | } | 1421 | } |
1432 | 1422 | ||
1433 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | ||
1434 | struct kvm_run *kvm_run) | ||
1435 | { | ||
1436 | kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && | ||
1437 | vcpu->irq_summary == 0); | ||
1438 | kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; | ||
1439 | kvm_run->cr8 = vcpu->cr8; | ||
1440 | kvm_run->apic_base = vcpu->apic_base; | ||
1441 | } | ||
1442 | |||
1443 | /* | ||
1444 | * Check if userspace requested an interrupt window, and that the | ||
1445 | * interrupt window is open. | ||
1446 | * | ||
1447 | * No need to exit to userspace if we already have an interrupt queued. | ||
1448 | */ | ||
1449 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | ||
1450 | struct kvm_run *kvm_run) | ||
1451 | { | ||
1452 | return (!vcpu->irq_summary && | ||
1453 | kvm_run->request_interrupt_window && | ||
1454 | vcpu->interrupt_window_open && | ||
1455 | (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); | ||
1456 | } | ||
1457 | |||
1458 | static void save_db_regs(unsigned long *db_regs) | 1423 | static void save_db_regs(unsigned long *db_regs) |
1459 | { | 1424 | { |
1460 | asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0])); | 1425 | asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0])); |
@@ -1476,49 +1441,37 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu) | |||
1476 | force_new_asid(vcpu); | 1441 | force_new_asid(vcpu); |
1477 | } | 1442 | } |
1478 | 1443 | ||
1479 | static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1444 | static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) |
1445 | { | ||
1446 | } | ||
1447 | |||
1448 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
1480 | { | 1449 | { |
1450 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1481 | u16 fs_selector; | 1451 | u16 fs_selector; |
1482 | u16 gs_selector; | 1452 | u16 gs_selector; |
1483 | u16 ldt_selector; | 1453 | u16 ldt_selector; |
1484 | int r; | ||
1485 | |||
1486 | again: | ||
1487 | r = kvm_mmu_reload(vcpu); | ||
1488 | if (unlikely(r)) | ||
1489 | return r; | ||
1490 | |||
1491 | if (!vcpu->mmio_read_completed) | ||
1492 | do_interrupt_requests(vcpu, kvm_run); | ||
1493 | 1454 | ||
1494 | clgi(); | 1455 | pre_svm_run(svm); |
1495 | |||
1496 | vcpu->guest_mode = 1; | ||
1497 | if (vcpu->requests) | ||
1498 | if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) | ||
1499 | svm_flush_tlb(vcpu); | ||
1500 | |||
1501 | pre_svm_run(vcpu); | ||
1502 | 1456 | ||
1503 | save_host_msrs(vcpu); | 1457 | save_host_msrs(vcpu); |
1504 | fs_selector = read_fs(); | 1458 | fs_selector = read_fs(); |
1505 | gs_selector = read_gs(); | 1459 | gs_selector = read_gs(); |
1506 | ldt_selector = read_ldt(); | 1460 | ldt_selector = read_ldt(); |
1507 | vcpu->svm->host_cr2 = kvm_read_cr2(); | 1461 | svm->host_cr2 = kvm_read_cr2(); |
1508 | vcpu->svm->host_dr6 = read_dr6(); | 1462 | svm->host_dr6 = read_dr6(); |
1509 | vcpu->svm->host_dr7 = read_dr7(); | 1463 | svm->host_dr7 = read_dr7(); |
1510 | vcpu->svm->vmcb->save.cr2 = vcpu->cr2; | 1464 | svm->vmcb->save.cr2 = vcpu->cr2; |
1511 | 1465 | ||
1512 | if (vcpu->svm->vmcb->save.dr7 & 0xff) { | 1466 | if (svm->vmcb->save.dr7 & 0xff) { |
1513 | write_dr7(0); | 1467 | write_dr7(0); |
1514 | save_db_regs(vcpu->svm->host_db_regs); | 1468 | save_db_regs(svm->host_db_regs); |
1515 | load_db_regs(vcpu->svm->db_regs); | 1469 | load_db_regs(svm->db_regs); |
1516 | } | 1470 | } |
1517 | 1471 | ||
1518 | if (vcpu->fpu_active) { | 1472 | clgi(); |
1519 | fx_save(vcpu->host_fx_image); | 1473 | |
1520 | fx_restore(vcpu->guest_fx_image); | 1474 | local_irq_enable(); |
1521 | } | ||
1522 | 1475 | ||
1523 | asm volatile ( | 1476 | asm volatile ( |
1524 | #ifdef CONFIG_X86_64 | 1477 | #ifdef CONFIG_X86_64 |
@@ -1532,34 +1485,33 @@ again: | |||
1532 | #endif | 1485 | #endif |
1533 | 1486 | ||
1534 | #ifdef CONFIG_X86_64 | 1487 | #ifdef CONFIG_X86_64 |
1535 | "mov %c[rbx](%[vcpu]), %%rbx \n\t" | 1488 | "mov %c[rbx](%[svm]), %%rbx \n\t" |
1536 | "mov %c[rcx](%[vcpu]), %%rcx \n\t" | 1489 | "mov %c[rcx](%[svm]), %%rcx \n\t" |
1537 | "mov %c[rdx](%[vcpu]), %%rdx \n\t" | 1490 | "mov %c[rdx](%[svm]), %%rdx \n\t" |
1538 | "mov %c[rsi](%[vcpu]), %%rsi \n\t" | 1491 | "mov %c[rsi](%[svm]), %%rsi \n\t" |
1539 | "mov %c[rdi](%[vcpu]), %%rdi \n\t" | 1492 | "mov %c[rdi](%[svm]), %%rdi \n\t" |
1540 | "mov %c[rbp](%[vcpu]), %%rbp \n\t" | 1493 | "mov %c[rbp](%[svm]), %%rbp \n\t" |
1541 | "mov %c[r8](%[vcpu]), %%r8 \n\t" | 1494 | "mov %c[r8](%[svm]), %%r8 \n\t" |
1542 | "mov %c[r9](%[vcpu]), %%r9 \n\t" | 1495 | "mov %c[r9](%[svm]), %%r9 \n\t" |
1543 | "mov %c[r10](%[vcpu]), %%r10 \n\t" | 1496 | "mov %c[r10](%[svm]), %%r10 \n\t" |
1544 | "mov %c[r11](%[vcpu]), %%r11 \n\t" | 1497 | "mov %c[r11](%[svm]), %%r11 \n\t" |
1545 | "mov %c[r12](%[vcpu]), %%r12 \n\t" | 1498 | "mov %c[r12](%[svm]), %%r12 \n\t" |
1546 | "mov %c[r13](%[vcpu]), %%r13 \n\t" | 1499 | "mov %c[r13](%[svm]), %%r13 \n\t" |
1547 | "mov %c[r14](%[vcpu]), %%r14 \n\t" | 1500 | "mov %c[r14](%[svm]), %%r14 \n\t" |
1548 | "mov %c[r15](%[vcpu]), %%r15 \n\t" | 1501 | "mov %c[r15](%[svm]), %%r15 \n\t" |
1549 | #else | 1502 | #else |
1550 | "mov %c[rbx](%[vcpu]), %%ebx \n\t" | 1503 | "mov %c[rbx](%[svm]), %%ebx \n\t" |
1551 | "mov %c[rcx](%[vcpu]), %%ecx \n\t" | 1504 | "mov %c[rcx](%[svm]), %%ecx \n\t" |
1552 | "mov %c[rdx](%[vcpu]), %%edx \n\t" | 1505 | "mov %c[rdx](%[svm]), %%edx \n\t" |
1553 | "mov %c[rsi](%[vcpu]), %%esi \n\t" | 1506 | "mov %c[rsi](%[svm]), %%esi \n\t" |
1554 | "mov %c[rdi](%[vcpu]), %%edi \n\t" | 1507 | "mov %c[rdi](%[svm]), %%edi \n\t" |
1555 | "mov %c[rbp](%[vcpu]), %%ebp \n\t" | 1508 | "mov %c[rbp](%[svm]), %%ebp \n\t" |
1556 | #endif | 1509 | #endif |
1557 | 1510 | ||
1558 | #ifdef CONFIG_X86_64 | 1511 | #ifdef CONFIG_X86_64 |
1559 | /* Enter guest mode */ | 1512 | /* Enter guest mode */ |
1560 | "push %%rax \n\t" | 1513 | "push %%rax \n\t" |
1561 | "mov %c[svm](%[vcpu]), %%rax \n\t" | 1514 | "mov %c[vmcb](%[svm]), %%rax \n\t" |
1562 | "mov %c[vmcb](%%rax), %%rax \n\t" | ||
1563 | SVM_VMLOAD "\n\t" | 1515 | SVM_VMLOAD "\n\t" |
1564 | SVM_VMRUN "\n\t" | 1516 | SVM_VMRUN "\n\t" |
1565 | SVM_VMSAVE "\n\t" | 1517 | SVM_VMSAVE "\n\t" |
@@ -1567,8 +1519,7 @@ again: | |||
1567 | #else | 1519 | #else |
1568 | /* Enter guest mode */ | 1520 | /* Enter guest mode */ |
1569 | "push %%eax \n\t" | 1521 | "push %%eax \n\t" |
1570 | "mov %c[svm](%[vcpu]), %%eax \n\t" | 1522 | "mov %c[vmcb](%[svm]), %%eax \n\t" |
1571 | "mov %c[vmcb](%%eax), %%eax \n\t" | ||
1572 | SVM_VMLOAD "\n\t" | 1523 | SVM_VMLOAD "\n\t" |
1573 | SVM_VMRUN "\n\t" | 1524 | SVM_VMRUN "\n\t" |
1574 | SVM_VMSAVE "\n\t" | 1525 | SVM_VMSAVE "\n\t" |
@@ -1577,73 +1528,69 @@ again: | |||
1577 | 1528 | ||
1578 | /* Save guest registers, load host registers */ | 1529 | /* Save guest registers, load host registers */ |
1579 | #ifdef CONFIG_X86_64 | 1530 | #ifdef CONFIG_X86_64 |
1580 | "mov %%rbx, %c[rbx](%[vcpu]) \n\t" | 1531 | "mov %%rbx, %c[rbx](%[svm]) \n\t" |
1581 | "mov %%rcx, %c[rcx](%[vcpu]) \n\t" | 1532 | "mov %%rcx, %c[rcx](%[svm]) \n\t" |
1582 | "mov %%rdx, %c[rdx](%[vcpu]) \n\t" | 1533 | "mov %%rdx, %c[rdx](%[svm]) \n\t" |
1583 | "mov %%rsi, %c[rsi](%[vcpu]) \n\t" | 1534 | "mov %%rsi, %c[rsi](%[svm]) \n\t" |
1584 | "mov %%rdi, %c[rdi](%[vcpu]) \n\t" | 1535 | "mov %%rdi, %c[rdi](%[svm]) \n\t" |
1585 | "mov %%rbp, %c[rbp](%[vcpu]) \n\t" | 1536 | "mov %%rbp, %c[rbp](%[svm]) \n\t" |
1586 | "mov %%r8, %c[r8](%[vcpu]) \n\t" | 1537 | "mov %%r8, %c[r8](%[svm]) \n\t" |
1587 | "mov %%r9, %c[r9](%[vcpu]) \n\t" | 1538 | "mov %%r9, %c[r9](%[svm]) \n\t" |
1588 | "mov %%r10, %c[r10](%[vcpu]) \n\t" | 1539 | "mov %%r10, %c[r10](%[svm]) \n\t" |
1589 | "mov %%r11, %c[r11](%[vcpu]) \n\t" | 1540 | "mov %%r11, %c[r11](%[svm]) \n\t" |
1590 | "mov %%r12, %c[r12](%[vcpu]) \n\t" | 1541 | "mov %%r12, %c[r12](%[svm]) \n\t" |
1591 | "mov %%r13, %c[r13](%[vcpu]) \n\t" | 1542 | "mov %%r13, %c[r13](%[svm]) \n\t" |
1592 | "mov %%r14, %c[r14](%[vcpu]) \n\t" | 1543 | "mov %%r14, %c[r14](%[svm]) \n\t" |
1593 | "mov %%r15, %c[r15](%[vcpu]) \n\t" | 1544 | "mov %%r15, %c[r15](%[svm]) \n\t" |
1594 | 1545 | ||
1595 | "pop %%r15; pop %%r14; pop %%r13; pop %%r12;" | 1546 | "pop %%r15; pop %%r14; pop %%r13; pop %%r12;" |
1596 | "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" | 1547 | "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" |
1597 | "pop %%rbp; pop %%rdi; pop %%rsi;" | 1548 | "pop %%rbp; pop %%rdi; pop %%rsi;" |
1598 | "pop %%rdx; pop %%rcx; pop %%rbx; \n\t" | 1549 | "pop %%rdx; pop %%rcx; pop %%rbx; \n\t" |
1599 | #else | 1550 | #else |
1600 | "mov %%ebx, %c[rbx](%[vcpu]) \n\t" | 1551 | "mov %%ebx, %c[rbx](%[svm]) \n\t" |
1601 | "mov %%ecx, %c[rcx](%[vcpu]) \n\t" | 1552 | "mov %%ecx, %c[rcx](%[svm]) \n\t" |
1602 | "mov %%edx, %c[rdx](%[vcpu]) \n\t" | 1553 | "mov %%edx, %c[rdx](%[svm]) \n\t" |
1603 | "mov %%esi, %c[rsi](%[vcpu]) \n\t" | 1554 | "mov %%esi, %c[rsi](%[svm]) \n\t" |
1604 | "mov %%edi, %c[rdi](%[vcpu]) \n\t" | 1555 | "mov %%edi, %c[rdi](%[svm]) \n\t" |
1605 | "mov %%ebp, %c[rbp](%[vcpu]) \n\t" | 1556 | "mov %%ebp, %c[rbp](%[svm]) \n\t" |
1606 | 1557 | ||
1607 | "pop %%ebp; pop %%edi; pop %%esi;" | 1558 | "pop %%ebp; pop %%edi; pop %%esi;" |
1608 | "pop %%edx; pop %%ecx; pop %%ebx; \n\t" | 1559 | "pop %%edx; pop %%ecx; pop %%ebx; \n\t" |
1609 | #endif | 1560 | #endif |
1610 | : | 1561 | : |
1611 | : [vcpu]"a"(vcpu), | 1562 | : [svm]"a"(svm), |
1612 | [svm]"i"(offsetof(struct kvm_vcpu, svm)), | ||
1613 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), | 1563 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), |
1614 | [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), | 1564 | [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])), |
1615 | [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])), | 1565 | [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])), |
1616 | [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])), | 1566 | [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])), |
1617 | [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])), | 1567 | [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])), |
1618 | [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])), | 1568 | [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])), |
1619 | [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP])) | 1569 | [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP])) |
1620 | #ifdef CONFIG_X86_64 | 1570 | #ifdef CONFIG_X86_64 |
1621 | ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])), | 1571 | ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])), |
1622 | [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])), | 1572 | [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])), |
1623 | [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])), | 1573 | [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])), |
1624 | [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])), | 1574 | [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])), |
1625 | [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])), | 1575 | [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])), |
1626 | [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])), | 1576 | [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])), |
1627 | [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])), | 1577 | [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])), |
1628 | [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])) | 1578 | [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15])) |
1629 | #endif | 1579 | #endif |
1630 | : "cc", "memory" ); | 1580 | : "cc", "memory" ); |
1631 | 1581 | ||
1632 | vcpu->guest_mode = 0; | 1582 | local_irq_disable(); |
1633 | 1583 | ||
1634 | if (vcpu->fpu_active) { | 1584 | stgi(); |
1635 | fx_save(vcpu->guest_fx_image); | ||
1636 | fx_restore(vcpu->host_fx_image); | ||
1637 | } | ||
1638 | 1585 | ||
1639 | if ((vcpu->svm->vmcb->save.dr7 & 0xff)) | 1586 | if ((svm->vmcb->save.dr7 & 0xff)) |
1640 | load_db_regs(vcpu->svm->host_db_regs); | 1587 | load_db_regs(svm->host_db_regs); |
1641 | 1588 | ||
1642 | vcpu->cr2 = vcpu->svm->vmcb->save.cr2; | 1589 | vcpu->cr2 = svm->vmcb->save.cr2; |
1643 | 1590 | ||
1644 | write_dr6(vcpu->svm->host_dr6); | 1591 | write_dr6(svm->host_dr6); |
1645 | write_dr7(vcpu->svm->host_dr7); | 1592 | write_dr7(svm->host_dr7); |
1646 | kvm_write_cr2(vcpu->svm->host_cr2); | 1593 | kvm_write_cr2(svm->host_cr2); |
1647 | 1594 | ||
1648 | load_fs(fs_selector); | 1595 | load_fs(fs_selector); |
1649 | load_gs(gs_selector); | 1596 | load_gs(gs_selector); |
@@ -1652,57 +1599,19 @@ again: | |||
1652 | 1599 | ||
1653 | reload_tss(vcpu); | 1600 | reload_tss(vcpu); |
1654 | 1601 | ||
1655 | /* | 1602 | svm->next_rip = 0; |
1656 | * Profile KVM exit RIPs: | ||
1657 | */ | ||
1658 | if (unlikely(prof_on == KVM_PROFILING)) | ||
1659 | profile_hit(KVM_PROFILING, | ||
1660 | (void *)(unsigned long)vcpu->svm->vmcb->save.rip); | ||
1661 | |||
1662 | stgi(); | ||
1663 | |||
1664 | kvm_reput_irq(vcpu); | ||
1665 | |||
1666 | vcpu->svm->next_rip = 0; | ||
1667 | |||
1668 | if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | ||
1669 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
1670 | kvm_run->fail_entry.hardware_entry_failure_reason | ||
1671 | = vcpu->svm->vmcb->control.exit_code; | ||
1672 | post_kvm_run_save(vcpu, kvm_run); | ||
1673 | return 0; | ||
1674 | } | ||
1675 | |||
1676 | r = handle_exit(vcpu, kvm_run); | ||
1677 | if (r > 0) { | ||
1678 | if (signal_pending(current)) { | ||
1679 | ++vcpu->stat.signal_exits; | ||
1680 | post_kvm_run_save(vcpu, kvm_run); | ||
1681 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
1682 | return -EINTR; | ||
1683 | } | ||
1684 | |||
1685 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | ||
1686 | ++vcpu->stat.request_irq_exits; | ||
1687 | post_kvm_run_save(vcpu, kvm_run); | ||
1688 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
1689 | return -EINTR; | ||
1690 | } | ||
1691 | kvm_resched(vcpu); | ||
1692 | goto again; | ||
1693 | } | ||
1694 | post_kvm_run_save(vcpu, kvm_run); | ||
1695 | return r; | ||
1696 | } | 1603 | } |
1697 | 1604 | ||
1698 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | 1605 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) |
1699 | { | 1606 | { |
1700 | vcpu->svm->vmcb->save.cr3 = root; | 1607 | struct vcpu_svm *svm = to_svm(vcpu); |
1608 | |||
1609 | svm->vmcb->save.cr3 = root; | ||
1701 | force_new_asid(vcpu); | 1610 | force_new_asid(vcpu); |
1702 | 1611 | ||
1703 | if (vcpu->fpu_active) { | 1612 | if (vcpu->fpu_active) { |
1704 | vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | 1613 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); |
1705 | vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK; | 1614 | svm->vmcb->save.cr0 |= X86_CR0_TS; |
1706 | vcpu->fpu_active = 0; | 1615 | vcpu->fpu_active = 0; |
1707 | } | 1616 | } |
1708 | } | 1617 | } |
@@ -1711,26 +1620,27 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu, | |||
1711 | unsigned long addr, | 1620 | unsigned long addr, |
1712 | uint32_t err_code) | 1621 | uint32_t err_code) |
1713 | { | 1622 | { |
1714 | uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; | 1623 | struct vcpu_svm *svm = to_svm(vcpu); |
1624 | uint32_t exit_int_info = svm->vmcb->control.exit_int_info; | ||
1715 | 1625 | ||
1716 | ++vcpu->stat.pf_guest; | 1626 | ++vcpu->stat.pf_guest; |
1717 | 1627 | ||
1718 | if (is_page_fault(exit_int_info)) { | 1628 | if (is_page_fault(exit_int_info)) { |
1719 | 1629 | ||
1720 | vcpu->svm->vmcb->control.event_inj_err = 0; | 1630 | svm->vmcb->control.event_inj_err = 0; |
1721 | vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | | 1631 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | |
1722 | SVM_EVTINJ_VALID_ERR | | 1632 | SVM_EVTINJ_VALID_ERR | |
1723 | SVM_EVTINJ_TYPE_EXEPT | | 1633 | SVM_EVTINJ_TYPE_EXEPT | |
1724 | DF_VECTOR; | 1634 | DF_VECTOR; |
1725 | return; | 1635 | return; |
1726 | } | 1636 | } |
1727 | vcpu->cr2 = addr; | 1637 | vcpu->cr2 = addr; |
1728 | vcpu->svm->vmcb->save.cr2 = addr; | 1638 | svm->vmcb->save.cr2 = addr; |
1729 | vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | | 1639 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | |
1730 | SVM_EVTINJ_VALID_ERR | | 1640 | SVM_EVTINJ_VALID_ERR | |
1731 | SVM_EVTINJ_TYPE_EXEPT | | 1641 | SVM_EVTINJ_TYPE_EXEPT | |
1732 | PF_VECTOR; | 1642 | PF_VECTOR; |
1733 | vcpu->svm->vmcb->control.event_inj_err = err_code; | 1643 | svm->vmcb->control.event_inj_err = err_code; |
1734 | } | 1644 | } |
1735 | 1645 | ||
1736 | 1646 | ||
@@ -1757,17 +1667,25 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
1757 | hypercall[3] = 0xc3; | 1667 | hypercall[3] = 0xc3; |
1758 | } | 1668 | } |
1759 | 1669 | ||
1760 | static struct kvm_arch_ops svm_arch_ops = { | 1670 | static void svm_check_processor_compat(void *rtn) |
1671 | { | ||
1672 | *(int *)rtn = 0; | ||
1673 | } | ||
1674 | |||
1675 | static struct kvm_x86_ops svm_x86_ops = { | ||
1761 | .cpu_has_kvm_support = has_svm, | 1676 | .cpu_has_kvm_support = has_svm, |
1762 | .disabled_by_bios = is_disabled, | 1677 | .disabled_by_bios = is_disabled, |
1763 | .hardware_setup = svm_hardware_setup, | 1678 | .hardware_setup = svm_hardware_setup, |
1764 | .hardware_unsetup = svm_hardware_unsetup, | 1679 | .hardware_unsetup = svm_hardware_unsetup, |
1680 | .check_processor_compatibility = svm_check_processor_compat, | ||
1765 | .hardware_enable = svm_hardware_enable, | 1681 | .hardware_enable = svm_hardware_enable, |
1766 | .hardware_disable = svm_hardware_disable, | 1682 | .hardware_disable = svm_hardware_disable, |
1767 | 1683 | ||
1768 | .vcpu_create = svm_create_vcpu, | 1684 | .vcpu_create = svm_create_vcpu, |
1769 | .vcpu_free = svm_free_vcpu, | 1685 | .vcpu_free = svm_free_vcpu, |
1686 | .vcpu_reset = svm_vcpu_reset, | ||
1770 | 1687 | ||
1688 | .prepare_guest_switch = svm_prepare_guest_switch, | ||
1771 | .vcpu_load = svm_vcpu_load, | 1689 | .vcpu_load = svm_vcpu_load, |
1772 | .vcpu_put = svm_vcpu_put, | 1690 | .vcpu_put = svm_vcpu_put, |
1773 | .vcpu_decache = svm_vcpu_decache, | 1691 | .vcpu_decache = svm_vcpu_decache, |
@@ -1778,7 +1696,7 @@ static struct kvm_arch_ops svm_arch_ops = { | |||
1778 | .get_segment_base = svm_get_segment_base, | 1696 | .get_segment_base = svm_get_segment_base, |
1779 | .get_segment = svm_get_segment, | 1697 | .get_segment = svm_get_segment, |
1780 | .set_segment = svm_set_segment, | 1698 | .set_segment = svm_set_segment, |
1781 | .get_cs_db_l_bits = svm_get_cs_db_l_bits, | 1699 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, |
1782 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, | 1700 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, |
1783 | .set_cr0 = svm_set_cr0, | 1701 | .set_cr0 = svm_set_cr0, |
1784 | .set_cr3 = svm_set_cr3, | 1702 | .set_cr3 = svm_set_cr3, |
@@ -1795,26 +1713,30 @@ static struct kvm_arch_ops svm_arch_ops = { | |||
1795 | .get_rflags = svm_get_rflags, | 1713 | .get_rflags = svm_get_rflags, |
1796 | .set_rflags = svm_set_rflags, | 1714 | .set_rflags = svm_set_rflags, |
1797 | 1715 | ||
1798 | .invlpg = svm_invlpg, | ||
1799 | .tlb_flush = svm_flush_tlb, | 1716 | .tlb_flush = svm_flush_tlb, |
1800 | .inject_page_fault = svm_inject_page_fault, | 1717 | .inject_page_fault = svm_inject_page_fault, |
1801 | 1718 | ||
1802 | .inject_gp = svm_inject_gp, | 1719 | .inject_gp = svm_inject_gp, |
1803 | 1720 | ||
1804 | .run = svm_vcpu_run, | 1721 | .run = svm_vcpu_run, |
1722 | .handle_exit = handle_exit, | ||
1805 | .skip_emulated_instruction = skip_emulated_instruction, | 1723 | .skip_emulated_instruction = skip_emulated_instruction, |
1806 | .vcpu_setup = svm_vcpu_setup, | ||
1807 | .patch_hypercall = svm_patch_hypercall, | 1724 | .patch_hypercall = svm_patch_hypercall, |
1725 | .get_irq = svm_get_irq, | ||
1726 | .set_irq = svm_set_irq, | ||
1727 | .inject_pending_irq = svm_intr_assist, | ||
1728 | .inject_pending_vectors = do_interrupt_requests, | ||
1808 | }; | 1729 | }; |
1809 | 1730 | ||
1810 | static int __init svm_init(void) | 1731 | static int __init svm_init(void) |
1811 | { | 1732 | { |
1812 | return kvm_init_arch(&svm_arch_ops, THIS_MODULE); | 1733 | return kvm_init_x86(&svm_x86_ops, sizeof(struct vcpu_svm), |
1734 | THIS_MODULE); | ||
1813 | } | 1735 | } |
1814 | 1736 | ||
1815 | static void __exit svm_exit(void) | 1737 | static void __exit svm_exit(void) |
1816 | { | 1738 | { |
1817 | kvm_exit_arch(); | 1739 | kvm_exit_x86(); |
1818 | } | 1740 | } |
1819 | 1741 | ||
1820 | module_init(svm_init) | 1742 | module_init(svm_init) |
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 80628f69916d..4f115a8e45ef 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c | |||
@@ -16,6 +16,8 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include "kvm.h" | 18 | #include "kvm.h" |
19 | #include "x86_emulate.h" | ||
20 | #include "irq.h" | ||
19 | #include "vmx.h" | 21 | #include "vmx.h" |
20 | #include "segment_descriptor.h" | 22 | #include "segment_descriptor.h" |
21 | 23 | ||
@@ -23,7 +25,6 @@ | |||
23 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
24 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
25 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
26 | #include <linux/profile.h> | ||
27 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
28 | 29 | ||
29 | #include <asm/io.h> | 30 | #include <asm/io.h> |
@@ -32,6 +33,39 @@ | |||
32 | MODULE_AUTHOR("Qumranet"); | 33 | MODULE_AUTHOR("Qumranet"); |
33 | MODULE_LICENSE("GPL"); | 34 | MODULE_LICENSE("GPL"); |
34 | 35 | ||
36 | struct vmcs { | ||
37 | u32 revision_id; | ||
38 | u32 abort; | ||
39 | char data[0]; | ||
40 | }; | ||
41 | |||
42 | struct vcpu_vmx { | ||
43 | struct kvm_vcpu vcpu; | ||
44 | int launched; | ||
45 | u8 fail; | ||
46 | struct kvm_msr_entry *guest_msrs; | ||
47 | struct kvm_msr_entry *host_msrs; | ||
48 | int nmsrs; | ||
49 | int save_nmsrs; | ||
50 | int msr_offset_efer; | ||
51 | #ifdef CONFIG_X86_64 | ||
52 | int msr_offset_kernel_gs_base; | ||
53 | #endif | ||
54 | struct vmcs *vmcs; | ||
55 | struct { | ||
56 | int loaded; | ||
57 | u16 fs_sel, gs_sel, ldt_sel; | ||
58 | int gs_ldt_reload_needed; | ||
59 | int fs_reload_needed; | ||
60 | }host_state; | ||
61 | |||
62 | }; | ||
63 | |||
64 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | ||
65 | { | ||
66 | return container_of(vcpu, struct vcpu_vmx, vcpu); | ||
67 | } | ||
68 | |||
35 | static int init_rmode_tss(struct kvm *kvm); | 69 | static int init_rmode_tss(struct kvm *kvm); |
36 | 70 | ||
37 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 71 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
@@ -40,18 +74,17 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | |||
40 | static struct page *vmx_io_bitmap_a; | 74 | static struct page *vmx_io_bitmap_a; |
41 | static struct page *vmx_io_bitmap_b; | 75 | static struct page *vmx_io_bitmap_b; |
42 | 76 | ||
43 | #ifdef CONFIG_X86_64 | ||
44 | #define HOST_IS_64 1 | ||
45 | #else | ||
46 | #define HOST_IS_64 0 | ||
47 | #endif | ||
48 | #define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) | 77 | #define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) |
49 | 78 | ||
50 | static struct vmcs_descriptor { | 79 | static struct vmcs_config { |
51 | int size; | 80 | int size; |
52 | int order; | 81 | int order; |
53 | u32 revision_id; | 82 | u32 revision_id; |
54 | } vmcs_descriptor; | 83 | u32 pin_based_exec_ctrl; |
84 | u32 cpu_based_exec_ctrl; | ||
85 | u32 vmexit_ctrl; | ||
86 | u32 vmentry_ctrl; | ||
87 | } vmcs_config; | ||
55 | 88 | ||
56 | #define VMX_SEGMENT_FIELD(seg) \ | 89 | #define VMX_SEGMENT_FIELD(seg) \ |
57 | [VCPU_SREG_##seg] = { \ | 90 | [VCPU_SREG_##seg] = { \ |
@@ -89,16 +122,32 @@ static const u32 vmx_msr_index[] = { | |||
89 | }; | 122 | }; |
90 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 123 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
91 | 124 | ||
92 | static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr) | 125 | static void load_msrs(struct kvm_msr_entry *e, int n) |
126 | { | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < n; ++i) | ||
130 | wrmsrl(e[i].index, e[i].data); | ||
131 | } | ||
132 | |||
133 | static void save_msrs(struct kvm_msr_entry *e, int n) | ||
134 | { | ||
135 | int i; | ||
136 | |||
137 | for (i = 0; i < n; ++i) | ||
138 | rdmsrl(e[i].index, e[i].data); | ||
139 | } | ||
140 | |||
141 | static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr) | ||
93 | { | 142 | { |
94 | return (u64)msr.data & EFER_SAVE_RESTORE_BITS; | 143 | return (u64)msr.data & EFER_SAVE_RESTORE_BITS; |
95 | } | 144 | } |
96 | 145 | ||
97 | static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) | 146 | static inline int msr_efer_need_save_restore(struct vcpu_vmx *vmx) |
98 | { | 147 | { |
99 | int efer_offset = vcpu->msr_offset_efer; | 148 | int efer_offset = vmx->msr_offset_efer; |
100 | return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != | 149 | return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) != |
101 | msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); | 150 | msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]); |
102 | } | 151 | } |
103 | 152 | ||
104 | static inline int is_page_fault(u32 intr_info) | 153 | static inline int is_page_fault(u32 intr_info) |
@@ -121,23 +170,33 @@ static inline int is_external_interrupt(u32 intr_info) | |||
121 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 170 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
122 | } | 171 | } |
123 | 172 | ||
124 | static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr) | 173 | static inline int cpu_has_vmx_tpr_shadow(void) |
174 | { | ||
175 | return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); | ||
176 | } | ||
177 | |||
178 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | ||
179 | { | ||
180 | return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm))); | ||
181 | } | ||
182 | |||
183 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | ||
125 | { | 184 | { |
126 | int i; | 185 | int i; |
127 | 186 | ||
128 | for (i = 0; i < vcpu->nmsrs; ++i) | 187 | for (i = 0; i < vmx->nmsrs; ++i) |
129 | if (vcpu->guest_msrs[i].index == msr) | 188 | if (vmx->guest_msrs[i].index == msr) |
130 | return i; | 189 | return i; |
131 | return -1; | 190 | return -1; |
132 | } | 191 | } |
133 | 192 | ||
134 | static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) | 193 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
135 | { | 194 | { |
136 | int i; | 195 | int i; |
137 | 196 | ||
138 | i = __find_msr_index(vcpu, msr); | 197 | i = __find_msr_index(vmx, msr); |
139 | if (i >= 0) | 198 | if (i >= 0) |
140 | return &vcpu->guest_msrs[i]; | 199 | return &vmx->guest_msrs[i]; |
141 | return NULL; | 200 | return NULL; |
142 | } | 201 | } |
143 | 202 | ||
@@ -156,23 +215,24 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
156 | 215 | ||
157 | static void __vcpu_clear(void *arg) | 216 | static void __vcpu_clear(void *arg) |
158 | { | 217 | { |
159 | struct kvm_vcpu *vcpu = arg; | 218 | struct vcpu_vmx *vmx = arg; |
160 | int cpu = raw_smp_processor_id(); | 219 | int cpu = raw_smp_processor_id(); |
161 | 220 | ||
162 | if (vcpu->cpu == cpu) | 221 | if (vmx->vcpu.cpu == cpu) |
163 | vmcs_clear(vcpu->vmcs); | 222 | vmcs_clear(vmx->vmcs); |
164 | if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) | 223 | if (per_cpu(current_vmcs, cpu) == vmx->vmcs) |
165 | per_cpu(current_vmcs, cpu) = NULL; | 224 | per_cpu(current_vmcs, cpu) = NULL; |
166 | rdtscll(vcpu->host_tsc); | 225 | rdtscll(vmx->vcpu.host_tsc); |
167 | } | 226 | } |
168 | 227 | ||
169 | static void vcpu_clear(struct kvm_vcpu *vcpu) | 228 | static void vcpu_clear(struct vcpu_vmx *vmx) |
170 | { | 229 | { |
171 | if (vcpu->cpu != raw_smp_processor_id() && vcpu->cpu != -1) | 230 | if (vmx->vcpu.cpu != raw_smp_processor_id() && vmx->vcpu.cpu != -1) |
172 | smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, 0, 1); | 231 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, |
232 | vmx, 0, 1); | ||
173 | else | 233 | else |
174 | __vcpu_clear(vcpu); | 234 | __vcpu_clear(vmx); |
175 | vcpu->launched = 0; | 235 | vmx->launched = 0; |
176 | } | 236 | } |
177 | 237 | ||
178 | static unsigned long vmcs_readl(unsigned long field) | 238 | static unsigned long vmcs_readl(unsigned long field) |
@@ -282,121 +342,122 @@ static void reload_tss(void) | |||
282 | #endif | 342 | #endif |
283 | } | 343 | } |
284 | 344 | ||
285 | static void load_transition_efer(struct kvm_vcpu *vcpu) | 345 | static void load_transition_efer(struct vcpu_vmx *vmx) |
286 | { | 346 | { |
287 | u64 trans_efer; | 347 | u64 trans_efer; |
288 | int efer_offset = vcpu->msr_offset_efer; | 348 | int efer_offset = vmx->msr_offset_efer; |
289 | 349 | ||
290 | trans_efer = vcpu->host_msrs[efer_offset].data; | 350 | trans_efer = vmx->host_msrs[efer_offset].data; |
291 | trans_efer &= ~EFER_SAVE_RESTORE_BITS; | 351 | trans_efer &= ~EFER_SAVE_RESTORE_BITS; |
292 | trans_efer |= msr_efer_save_restore_bits( | 352 | trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]); |
293 | vcpu->guest_msrs[efer_offset]); | ||
294 | wrmsrl(MSR_EFER, trans_efer); | 353 | wrmsrl(MSR_EFER, trans_efer); |
295 | vcpu->stat.efer_reload++; | 354 | vmx->vcpu.stat.efer_reload++; |
296 | } | 355 | } |
297 | 356 | ||
298 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 357 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
299 | { | 358 | { |
300 | struct vmx_host_state *hs = &vcpu->vmx_host_state; | 359 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
301 | 360 | ||
302 | if (hs->loaded) | 361 | if (vmx->host_state.loaded) |
303 | return; | 362 | return; |
304 | 363 | ||
305 | hs->loaded = 1; | 364 | vmx->host_state.loaded = 1; |
306 | /* | 365 | /* |
307 | * Set host fs and gs selectors. Unfortunately, 22.2.3 does not | 366 | * Set host fs and gs selectors. Unfortunately, 22.2.3 does not |
308 | * allow segment selectors with cpl > 0 or ti == 1. | 367 | * allow segment selectors with cpl > 0 or ti == 1. |
309 | */ | 368 | */ |
310 | hs->ldt_sel = read_ldt(); | 369 | vmx->host_state.ldt_sel = read_ldt(); |
311 | hs->fs_gs_ldt_reload_needed = hs->ldt_sel; | 370 | vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; |
312 | hs->fs_sel = read_fs(); | 371 | vmx->host_state.fs_sel = read_fs(); |
313 | if (!(hs->fs_sel & 7)) | 372 | if (!(vmx->host_state.fs_sel & 7)) { |
314 | vmcs_write16(HOST_FS_SELECTOR, hs->fs_sel); | 373 | vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); |
315 | else { | 374 | vmx->host_state.fs_reload_needed = 0; |
375 | } else { | ||
316 | vmcs_write16(HOST_FS_SELECTOR, 0); | 376 | vmcs_write16(HOST_FS_SELECTOR, 0); |
317 | hs->fs_gs_ldt_reload_needed = 1; | 377 | vmx->host_state.fs_reload_needed = 1; |
318 | } | 378 | } |
319 | hs->gs_sel = read_gs(); | 379 | vmx->host_state.gs_sel = read_gs(); |
320 | if (!(hs->gs_sel & 7)) | 380 | if (!(vmx->host_state.gs_sel & 7)) |
321 | vmcs_write16(HOST_GS_SELECTOR, hs->gs_sel); | 381 | vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); |
322 | else { | 382 | else { |
323 | vmcs_write16(HOST_GS_SELECTOR, 0); | 383 | vmcs_write16(HOST_GS_SELECTOR, 0); |
324 | hs->fs_gs_ldt_reload_needed = 1; | 384 | vmx->host_state.gs_ldt_reload_needed = 1; |
325 | } | 385 | } |
326 | 386 | ||
327 | #ifdef CONFIG_X86_64 | 387 | #ifdef CONFIG_X86_64 |
328 | vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); | 388 | vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); |
329 | vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); | 389 | vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); |
330 | #else | 390 | #else |
331 | vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel)); | 391 | vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); |
332 | vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel)); | 392 | vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); |
333 | #endif | 393 | #endif |
334 | 394 | ||
335 | #ifdef CONFIG_X86_64 | 395 | #ifdef CONFIG_X86_64 |
336 | if (is_long_mode(vcpu)) { | 396 | if (is_long_mode(&vmx->vcpu)) { |
337 | save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1); | 397 | save_msrs(vmx->host_msrs + |
398 | vmx->msr_offset_kernel_gs_base, 1); | ||
338 | } | 399 | } |
339 | #endif | 400 | #endif |
340 | load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); | 401 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); |
341 | if (msr_efer_need_save_restore(vcpu)) | 402 | if (msr_efer_need_save_restore(vmx)) |
342 | load_transition_efer(vcpu); | 403 | load_transition_efer(vmx); |
343 | } | 404 | } |
344 | 405 | ||
345 | static void vmx_load_host_state(struct kvm_vcpu *vcpu) | 406 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
346 | { | 407 | { |
347 | struct vmx_host_state *hs = &vcpu->vmx_host_state; | 408 | unsigned long flags; |
348 | 409 | ||
349 | if (!hs->loaded) | 410 | if (!vmx->host_state.loaded) |
350 | return; | 411 | return; |
351 | 412 | ||
352 | hs->loaded = 0; | 413 | vmx->host_state.loaded = 0; |
353 | if (hs->fs_gs_ldt_reload_needed) { | 414 | if (vmx->host_state.fs_reload_needed) |
354 | load_ldt(hs->ldt_sel); | 415 | load_fs(vmx->host_state.fs_sel); |
355 | load_fs(hs->fs_sel); | 416 | if (vmx->host_state.gs_ldt_reload_needed) { |
417 | load_ldt(vmx->host_state.ldt_sel); | ||
356 | /* | 418 | /* |
357 | * If we have to reload gs, we must take care to | 419 | * If we have to reload gs, we must take care to |
358 | * preserve our gs base. | 420 | * preserve our gs base. |
359 | */ | 421 | */ |
360 | local_irq_disable(); | 422 | local_irq_save(flags); |
361 | load_gs(hs->gs_sel); | 423 | load_gs(vmx->host_state.gs_sel); |
362 | #ifdef CONFIG_X86_64 | 424 | #ifdef CONFIG_X86_64 |
363 | wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); | 425 | wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); |
364 | #endif | 426 | #endif |
365 | local_irq_enable(); | 427 | local_irq_restore(flags); |
366 | |||
367 | reload_tss(); | ||
368 | } | 428 | } |
369 | save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); | 429 | reload_tss(); |
370 | load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); | 430 | save_msrs(vmx->guest_msrs, vmx->save_nmsrs); |
371 | if (msr_efer_need_save_restore(vcpu)) | 431 | load_msrs(vmx->host_msrs, vmx->save_nmsrs); |
372 | load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); | 432 | if (msr_efer_need_save_restore(vmx)) |
433 | load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); | ||
373 | } | 434 | } |
374 | 435 | ||
375 | /* | 436 | /* |
376 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | 437 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes |
377 | * vcpu mutex is already taken. | 438 | * vcpu mutex is already taken. |
378 | */ | 439 | */ |
379 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu) | 440 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
380 | { | 441 | { |
381 | u64 phys_addr = __pa(vcpu->vmcs); | 442 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
382 | int cpu; | 443 | u64 phys_addr = __pa(vmx->vmcs); |
383 | u64 tsc_this, delta; | 444 | u64 tsc_this, delta; |
384 | 445 | ||
385 | cpu = get_cpu(); | 446 | if (vcpu->cpu != cpu) { |
386 | 447 | vcpu_clear(vmx); | |
387 | if (vcpu->cpu != cpu) | 448 | kvm_migrate_apic_timer(vcpu); |
388 | vcpu_clear(vcpu); | 449 | } |
389 | 450 | ||
390 | if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) { | 451 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { |
391 | u8 error; | 452 | u8 error; |
392 | 453 | ||
393 | per_cpu(current_vmcs, cpu) = vcpu->vmcs; | 454 | per_cpu(current_vmcs, cpu) = vmx->vmcs; |
394 | asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" | 455 | asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" |
395 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 456 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) |
396 | : "cc"); | 457 | : "cc"); |
397 | if (error) | 458 | if (error) |
398 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | 459 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", |
399 | vcpu->vmcs, phys_addr); | 460 | vmx->vmcs, phys_addr); |
400 | } | 461 | } |
401 | 462 | ||
402 | if (vcpu->cpu != cpu) { | 463 | if (vcpu->cpu != cpu) { |
@@ -426,9 +487,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) | |||
426 | 487 | ||
427 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 488 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
428 | { | 489 | { |
429 | vmx_load_host_state(vcpu); | 490 | vmx_load_host_state(to_vmx(vcpu)); |
430 | kvm_put_guest_fpu(vcpu); | 491 | kvm_put_guest_fpu(vcpu); |
431 | put_cpu(); | ||
432 | } | 492 | } |
433 | 493 | ||
434 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 494 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
@@ -436,9 +496,9 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | |||
436 | if (vcpu->fpu_active) | 496 | if (vcpu->fpu_active) |
437 | return; | 497 | return; |
438 | vcpu->fpu_active = 1; | 498 | vcpu->fpu_active = 1; |
439 | vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); | 499 | vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); |
440 | if (vcpu->cr0 & CR0_TS_MASK) | 500 | if (vcpu->cr0 & X86_CR0_TS) |
441 | vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); | 501 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); |
442 | update_exception_bitmap(vcpu); | 502 | update_exception_bitmap(vcpu); |
443 | } | 503 | } |
444 | 504 | ||
@@ -447,13 +507,13 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
447 | if (!vcpu->fpu_active) | 507 | if (!vcpu->fpu_active) |
448 | return; | 508 | return; |
449 | vcpu->fpu_active = 0; | 509 | vcpu->fpu_active = 0; |
450 | vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); | 510 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); |
451 | update_exception_bitmap(vcpu); | 511 | update_exception_bitmap(vcpu); |
452 | } | 512 | } |
453 | 513 | ||
454 | static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) | 514 | static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) |
455 | { | 515 | { |
456 | vcpu_clear(vcpu); | 516 | vcpu_clear(to_vmx(vcpu)); |
457 | } | 517 | } |
458 | 518 | ||
459 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 519 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
@@ -501,59 +561,62 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) | |||
501 | /* | 561 | /* |
502 | * Swap MSR entry in host/guest MSR entry array. | 562 | * Swap MSR entry in host/guest MSR entry array. |
503 | */ | 563 | */ |
504 | void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) | 564 | #ifdef CONFIG_X86_64 |
565 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | ||
505 | { | 566 | { |
506 | struct vmx_msr_entry tmp; | 567 | struct kvm_msr_entry tmp; |
507 | tmp = vcpu->guest_msrs[to]; | 568 | |
508 | vcpu->guest_msrs[to] = vcpu->guest_msrs[from]; | 569 | tmp = vmx->guest_msrs[to]; |
509 | vcpu->guest_msrs[from] = tmp; | 570 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; |
510 | tmp = vcpu->host_msrs[to]; | 571 | vmx->guest_msrs[from] = tmp; |
511 | vcpu->host_msrs[to] = vcpu->host_msrs[from]; | 572 | tmp = vmx->host_msrs[to]; |
512 | vcpu->host_msrs[from] = tmp; | 573 | vmx->host_msrs[to] = vmx->host_msrs[from]; |
574 | vmx->host_msrs[from] = tmp; | ||
513 | } | 575 | } |
576 | #endif | ||
514 | 577 | ||
515 | /* | 578 | /* |
516 | * Set up the vmcs to automatically save and restore system | 579 | * Set up the vmcs to automatically save and restore system |
517 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | 580 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
518 | * mode, as fiddling with msrs is very expensive. | 581 | * mode, as fiddling with msrs is very expensive. |
519 | */ | 582 | */ |
520 | static void setup_msrs(struct kvm_vcpu *vcpu) | 583 | static void setup_msrs(struct vcpu_vmx *vmx) |
521 | { | 584 | { |
522 | int save_nmsrs; | 585 | int save_nmsrs; |
523 | 586 | ||
524 | save_nmsrs = 0; | 587 | save_nmsrs = 0; |
525 | #ifdef CONFIG_X86_64 | 588 | #ifdef CONFIG_X86_64 |
526 | if (is_long_mode(vcpu)) { | 589 | if (is_long_mode(&vmx->vcpu)) { |
527 | int index; | 590 | int index; |
528 | 591 | ||
529 | index = __find_msr_index(vcpu, MSR_SYSCALL_MASK); | 592 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); |
530 | if (index >= 0) | 593 | if (index >= 0) |
531 | move_msr_up(vcpu, index, save_nmsrs++); | 594 | move_msr_up(vmx, index, save_nmsrs++); |
532 | index = __find_msr_index(vcpu, MSR_LSTAR); | 595 | index = __find_msr_index(vmx, MSR_LSTAR); |
533 | if (index >= 0) | 596 | if (index >= 0) |
534 | move_msr_up(vcpu, index, save_nmsrs++); | 597 | move_msr_up(vmx, index, save_nmsrs++); |
535 | index = __find_msr_index(vcpu, MSR_CSTAR); | 598 | index = __find_msr_index(vmx, MSR_CSTAR); |
536 | if (index >= 0) | 599 | if (index >= 0) |
537 | move_msr_up(vcpu, index, save_nmsrs++); | 600 | move_msr_up(vmx, index, save_nmsrs++); |
538 | index = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); | 601 | index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); |
539 | if (index >= 0) | 602 | if (index >= 0) |
540 | move_msr_up(vcpu, index, save_nmsrs++); | 603 | move_msr_up(vmx, index, save_nmsrs++); |
541 | /* | 604 | /* |
542 | * MSR_K6_STAR is only needed on long mode guests, and only | 605 | * MSR_K6_STAR is only needed on long mode guests, and only |
543 | * if efer.sce is enabled. | 606 | * if efer.sce is enabled. |
544 | */ | 607 | */ |
545 | index = __find_msr_index(vcpu, MSR_K6_STAR); | 608 | index = __find_msr_index(vmx, MSR_K6_STAR); |
546 | if ((index >= 0) && (vcpu->shadow_efer & EFER_SCE)) | 609 | if ((index >= 0) && (vmx->vcpu.shadow_efer & EFER_SCE)) |
547 | move_msr_up(vcpu, index, save_nmsrs++); | 610 | move_msr_up(vmx, index, save_nmsrs++); |
548 | } | 611 | } |
549 | #endif | 612 | #endif |
550 | vcpu->save_nmsrs = save_nmsrs; | 613 | vmx->save_nmsrs = save_nmsrs; |
551 | 614 | ||
552 | #ifdef CONFIG_X86_64 | 615 | #ifdef CONFIG_X86_64 |
553 | vcpu->msr_offset_kernel_gs_base = | 616 | vmx->msr_offset_kernel_gs_base = |
554 | __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); | 617 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); |
555 | #endif | 618 | #endif |
556 | vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); | 619 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); |
557 | } | 620 | } |
558 | 621 | ||
559 | /* | 622 | /* |
@@ -589,7 +652,7 @@ static void guest_write_tsc(u64 guest_tsc) | |||
589 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 652 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
590 | { | 653 | { |
591 | u64 data; | 654 | u64 data; |
592 | struct vmx_msr_entry *msr; | 655 | struct kvm_msr_entry *msr; |
593 | 656 | ||
594 | if (!pdata) { | 657 | if (!pdata) { |
595 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); | 658 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); |
@@ -620,7 +683,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
620 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 683 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
621 | break; | 684 | break; |
622 | default: | 685 | default: |
623 | msr = find_msr_entry(vcpu, msr_index); | 686 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
624 | if (msr) { | 687 | if (msr) { |
625 | data = msr->data; | 688 | data = msr->data; |
626 | break; | 689 | break; |
@@ -639,15 +702,16 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
639 | */ | 702 | */ |
640 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 703 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
641 | { | 704 | { |
642 | struct vmx_msr_entry *msr; | 705 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
706 | struct kvm_msr_entry *msr; | ||
643 | int ret = 0; | 707 | int ret = 0; |
644 | 708 | ||
645 | switch (msr_index) { | 709 | switch (msr_index) { |
646 | #ifdef CONFIG_X86_64 | 710 | #ifdef CONFIG_X86_64 |
647 | case MSR_EFER: | 711 | case MSR_EFER: |
648 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 712 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
649 | if (vcpu->vmx_host_state.loaded) | 713 | if (vmx->host_state.loaded) |
650 | load_transition_efer(vcpu); | 714 | load_transition_efer(vmx); |
651 | break; | 715 | break; |
652 | case MSR_FS_BASE: | 716 | case MSR_FS_BASE: |
653 | vmcs_writel(GUEST_FS_BASE, data); | 717 | vmcs_writel(GUEST_FS_BASE, data); |
@@ -669,11 +733,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
669 | guest_write_tsc(data); | 733 | guest_write_tsc(data); |
670 | break; | 734 | break; |
671 | default: | 735 | default: |
672 | msr = find_msr_entry(vcpu, msr_index); | 736 | msr = find_msr_entry(vmx, msr_index); |
673 | if (msr) { | 737 | if (msr) { |
674 | msr->data = data; | 738 | msr->data = data; |
675 | if (vcpu->vmx_host_state.loaded) | 739 | if (vmx->host_state.loaded) |
676 | load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); | 740 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); |
677 | break; | 741 | break; |
678 | } | 742 | } |
679 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 743 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
@@ -740,6 +804,20 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | |||
740 | return 0; | 804 | return 0; |
741 | } | 805 | } |
742 | 806 | ||
807 | static int vmx_get_irq(struct kvm_vcpu *vcpu) | ||
808 | { | ||
809 | u32 idtv_info_field; | ||
810 | |||
811 | idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD); | ||
812 | if (idtv_info_field & INTR_INFO_VALID_MASK) { | ||
813 | if (is_external_interrupt(idtv_info_field)) | ||
814 | return idtv_info_field & VECTORING_INFO_VECTOR_MASK; | ||
815 | else | ||
816 | printk("pending exception: not handled yet\n"); | ||
817 | } | ||
818 | return -1; | ||
819 | } | ||
820 | |||
743 | static __init int cpu_has_kvm_support(void) | 821 | static __init int cpu_has_kvm_support(void) |
744 | { | 822 | { |
745 | unsigned long ecx = cpuid_ecx(1); | 823 | unsigned long ecx = cpuid_ecx(1); |
@@ -751,7 +829,10 @@ static __init int vmx_disabled_by_bios(void) | |||
751 | u64 msr; | 829 | u64 msr; |
752 | 830 | ||
753 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 831 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
754 | return (msr & 5) == 1; /* locked but not enabled */ | 832 | return (msr & (MSR_IA32_FEATURE_CONTROL_LOCKED | |
833 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | ||
834 | == MSR_IA32_FEATURE_CONTROL_LOCKED; | ||
835 | /* locked but not enabled */ | ||
755 | } | 836 | } |
756 | 837 | ||
757 | static void hardware_enable(void *garbage) | 838 | static void hardware_enable(void *garbage) |
@@ -761,10 +842,15 @@ static void hardware_enable(void *garbage) | |||
761 | u64 old; | 842 | u64 old; |
762 | 843 | ||
763 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 844 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
764 | if ((old & 5) != 5) | 845 | if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | |
846 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | ||
847 | != (MSR_IA32_FEATURE_CONTROL_LOCKED | | ||
848 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | ||
765 | /* enable and lock */ | 849 | /* enable and lock */ |
766 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 5); | 850 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | |
767 | write_cr4(read_cr4() | CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 851 | MSR_IA32_FEATURE_CONTROL_LOCKED | |
852 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); | ||
853 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | ||
768 | asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) | 854 | asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) |
769 | : "memory", "cc"); | 855 | : "memory", "cc"); |
770 | } | 856 | } |
@@ -774,14 +860,102 @@ static void hardware_disable(void *garbage) | |||
774 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); | 860 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); |
775 | } | 861 | } |
776 | 862 | ||
777 | static __init void setup_vmcs_descriptor(void) | 863 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
864 | u32 msr, u32* result) | ||
865 | { | ||
866 | u32 vmx_msr_low, vmx_msr_high; | ||
867 | u32 ctl = ctl_min | ctl_opt; | ||
868 | |||
869 | rdmsr(msr, vmx_msr_low, vmx_msr_high); | ||
870 | |||
871 | ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ | ||
872 | ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ | ||
873 | |||
874 | /* Ensure minimum (required) set of control bits are supported. */ | ||
875 | if (ctl_min & ~ctl) | ||
876 | return -EIO; | ||
877 | |||
878 | *result = ctl; | ||
879 | return 0; | ||
880 | } | ||
881 | |||
882 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | ||
778 | { | 883 | { |
779 | u32 vmx_msr_low, vmx_msr_high; | 884 | u32 vmx_msr_low, vmx_msr_high; |
885 | u32 min, opt; | ||
886 | u32 _pin_based_exec_control = 0; | ||
887 | u32 _cpu_based_exec_control = 0; | ||
888 | u32 _vmexit_control = 0; | ||
889 | u32 _vmentry_control = 0; | ||
890 | |||
891 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | ||
892 | opt = 0; | ||
893 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | ||
894 | &_pin_based_exec_control) < 0) | ||
895 | return -EIO; | ||
896 | |||
897 | min = CPU_BASED_HLT_EXITING | | ||
898 | #ifdef CONFIG_X86_64 | ||
899 | CPU_BASED_CR8_LOAD_EXITING | | ||
900 | CPU_BASED_CR8_STORE_EXITING | | ||
901 | #endif | ||
902 | CPU_BASED_USE_IO_BITMAPS | | ||
903 | CPU_BASED_MOV_DR_EXITING | | ||
904 | CPU_BASED_USE_TSC_OFFSETING; | ||
905 | #ifdef CONFIG_X86_64 | ||
906 | opt = CPU_BASED_TPR_SHADOW; | ||
907 | #else | ||
908 | opt = 0; | ||
909 | #endif | ||
910 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, | ||
911 | &_cpu_based_exec_control) < 0) | ||
912 | return -EIO; | ||
913 | #ifdef CONFIG_X86_64 | ||
914 | if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) | ||
915 | _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & | ||
916 | ~CPU_BASED_CR8_STORE_EXITING; | ||
917 | #endif | ||
918 | |||
919 | min = 0; | ||
920 | #ifdef CONFIG_X86_64 | ||
921 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | ||
922 | #endif | ||
923 | opt = 0; | ||
924 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, | ||
925 | &_vmexit_control) < 0) | ||
926 | return -EIO; | ||
927 | |||
928 | min = opt = 0; | ||
929 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, | ||
930 | &_vmentry_control) < 0) | ||
931 | return -EIO; | ||
780 | 932 | ||
781 | rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); | 933 | rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); |
782 | vmcs_descriptor.size = vmx_msr_high & 0x1fff; | 934 | |
783 | vmcs_descriptor.order = get_order(vmcs_descriptor.size); | 935 | /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ |
784 | vmcs_descriptor.revision_id = vmx_msr_low; | 936 | if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) |
937 | return -EIO; | ||
938 | |||
939 | #ifdef CONFIG_X86_64 | ||
940 | /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ | ||
941 | if (vmx_msr_high & (1u<<16)) | ||
942 | return -EIO; | ||
943 | #endif | ||
944 | |||
945 | /* Require Write-Back (WB) memory type for VMCS accesses. */ | ||
946 | if (((vmx_msr_high >> 18) & 15) != 6) | ||
947 | return -EIO; | ||
948 | |||
949 | vmcs_conf->size = vmx_msr_high & 0x1fff; | ||
950 | vmcs_conf->order = get_order(vmcs_config.size); | ||
951 | vmcs_conf->revision_id = vmx_msr_low; | ||
952 | |||
953 | vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; | ||
954 | vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; | ||
955 | vmcs_conf->vmexit_ctrl = _vmexit_control; | ||
956 | vmcs_conf->vmentry_ctrl = _vmentry_control; | ||
957 | |||
958 | return 0; | ||
785 | } | 959 | } |
786 | 960 | ||
787 | static struct vmcs *alloc_vmcs_cpu(int cpu) | 961 | static struct vmcs *alloc_vmcs_cpu(int cpu) |
@@ -790,12 +964,12 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) | |||
790 | struct page *pages; | 964 | struct page *pages; |
791 | struct vmcs *vmcs; | 965 | struct vmcs *vmcs; |
792 | 966 | ||
793 | pages = alloc_pages_node(node, GFP_KERNEL, vmcs_descriptor.order); | 967 | pages = alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); |
794 | if (!pages) | 968 | if (!pages) |
795 | return NULL; | 969 | return NULL; |
796 | vmcs = page_address(pages); | 970 | vmcs = page_address(pages); |
797 | memset(vmcs, 0, vmcs_descriptor.size); | 971 | memset(vmcs, 0, vmcs_config.size); |
798 | vmcs->revision_id = vmcs_descriptor.revision_id; /* vmcs revision id */ | 972 | vmcs->revision_id = vmcs_config.revision_id; /* vmcs revision id */ |
799 | return vmcs; | 973 | return vmcs; |
800 | } | 974 | } |
801 | 975 | ||
@@ -806,7 +980,7 @@ static struct vmcs *alloc_vmcs(void) | |||
806 | 980 | ||
807 | static void free_vmcs(struct vmcs *vmcs) | 981 | static void free_vmcs(struct vmcs *vmcs) |
808 | { | 982 | { |
809 | free_pages((unsigned long)vmcs, vmcs_descriptor.order); | 983 | free_pages((unsigned long)vmcs, vmcs_config.order); |
810 | } | 984 | } |
811 | 985 | ||
812 | static void free_kvm_area(void) | 986 | static void free_kvm_area(void) |
@@ -817,8 +991,6 @@ static void free_kvm_area(void) | |||
817 | free_vmcs(per_cpu(vmxarea, cpu)); | 991 | free_vmcs(per_cpu(vmxarea, cpu)); |
818 | } | 992 | } |
819 | 993 | ||
820 | extern struct vmcs *alloc_vmcs_cpu(int cpu); | ||
821 | |||
822 | static __init int alloc_kvm_area(void) | 994 | static __init int alloc_kvm_area(void) |
823 | { | 995 | { |
824 | int cpu; | 996 | int cpu; |
@@ -839,7 +1011,8 @@ static __init int alloc_kvm_area(void) | |||
839 | 1011 | ||
840 | static __init int hardware_setup(void) | 1012 | static __init int hardware_setup(void) |
841 | { | 1013 | { |
842 | setup_vmcs_descriptor(); | 1014 | if (setup_vmcs_config(&vmcs_config) < 0) |
1015 | return -EIO; | ||
843 | return alloc_kvm_area(); | 1016 | return alloc_kvm_area(); |
844 | } | 1017 | } |
845 | 1018 | ||
@@ -879,8 +1052,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
879 | flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT); | 1052 | flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT); |
880 | vmcs_writel(GUEST_RFLAGS, flags); | 1053 | vmcs_writel(GUEST_RFLAGS, flags); |
881 | 1054 | ||
882 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) | | 1055 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | |
883 | (vmcs_readl(CR4_READ_SHADOW) & CR4_VME_MASK)); | 1056 | (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); |
884 | 1057 | ||
885 | update_exception_bitmap(vcpu); | 1058 | update_exception_bitmap(vcpu); |
886 | 1059 | ||
@@ -897,7 +1070,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
897 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); | 1070 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); |
898 | } | 1071 | } |
899 | 1072 | ||
900 | static int rmode_tss_base(struct kvm* kvm) | 1073 | static gva_t rmode_tss_base(struct kvm* kvm) |
901 | { | 1074 | { |
902 | gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3; | 1075 | gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3; |
903 | return base_gfn << PAGE_SHIFT; | 1076 | return base_gfn << PAGE_SHIFT; |
@@ -937,7 +1110,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
937 | flags |= IOPL_MASK | X86_EFLAGS_VM; | 1110 | flags |= IOPL_MASK | X86_EFLAGS_VM; |
938 | 1111 | ||
939 | vmcs_writel(GUEST_RFLAGS, flags); | 1112 | vmcs_writel(GUEST_RFLAGS, flags); |
940 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | CR4_VME_MASK); | 1113 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); |
941 | update_exception_bitmap(vcpu); | 1114 | update_exception_bitmap(vcpu); |
942 | 1115 | ||
943 | vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); | 1116 | vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); |
@@ -975,10 +1148,10 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
975 | 1148 | ||
976 | vcpu->shadow_efer |= EFER_LMA; | 1149 | vcpu->shadow_efer |= EFER_LMA; |
977 | 1150 | ||
978 | find_msr_entry(vcpu, MSR_EFER)->data |= EFER_LMA | EFER_LME; | 1151 | find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME; |
979 | vmcs_write32(VM_ENTRY_CONTROLS, | 1152 | vmcs_write32(VM_ENTRY_CONTROLS, |
980 | vmcs_read32(VM_ENTRY_CONTROLS) | 1153 | vmcs_read32(VM_ENTRY_CONTROLS) |
981 | | VM_ENTRY_CONTROLS_IA32E_MASK); | 1154 | | VM_ENTRY_IA32E_MODE); |
982 | } | 1155 | } |
983 | 1156 | ||
984 | static void exit_lmode(struct kvm_vcpu *vcpu) | 1157 | static void exit_lmode(struct kvm_vcpu *vcpu) |
@@ -987,7 +1160,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
987 | 1160 | ||
988 | vmcs_write32(VM_ENTRY_CONTROLS, | 1161 | vmcs_write32(VM_ENTRY_CONTROLS, |
989 | vmcs_read32(VM_ENTRY_CONTROLS) | 1162 | vmcs_read32(VM_ENTRY_CONTROLS) |
990 | & ~VM_ENTRY_CONTROLS_IA32E_MASK); | 1163 | & ~VM_ENTRY_IA32E_MODE); |
991 | } | 1164 | } |
992 | 1165 | ||
993 | #endif | 1166 | #endif |
@@ -1002,17 +1175,17 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1002 | { | 1175 | { |
1003 | vmx_fpu_deactivate(vcpu); | 1176 | vmx_fpu_deactivate(vcpu); |
1004 | 1177 | ||
1005 | if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) | 1178 | if (vcpu->rmode.active && (cr0 & X86_CR0_PE)) |
1006 | enter_pmode(vcpu); | 1179 | enter_pmode(vcpu); |
1007 | 1180 | ||
1008 | if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK)) | 1181 | if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE)) |
1009 | enter_rmode(vcpu); | 1182 | enter_rmode(vcpu); |
1010 | 1183 | ||
1011 | #ifdef CONFIG_X86_64 | 1184 | #ifdef CONFIG_X86_64 |
1012 | if (vcpu->shadow_efer & EFER_LME) { | 1185 | if (vcpu->shadow_efer & EFER_LME) { |
1013 | if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) | 1186 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) |
1014 | enter_lmode(vcpu); | 1187 | enter_lmode(vcpu); |
1015 | if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK)) | 1188 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) |
1016 | exit_lmode(vcpu); | 1189 | exit_lmode(vcpu); |
1017 | } | 1190 | } |
1018 | #endif | 1191 | #endif |
@@ -1022,14 +1195,14 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1022 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | 1195 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); |
1023 | vcpu->cr0 = cr0; | 1196 | vcpu->cr0 = cr0; |
1024 | 1197 | ||
1025 | if (!(cr0 & CR0_TS_MASK) || !(cr0 & CR0_PE_MASK)) | 1198 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) |
1026 | vmx_fpu_activate(vcpu); | 1199 | vmx_fpu_activate(vcpu); |
1027 | } | 1200 | } |
1028 | 1201 | ||
1029 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 1202 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
1030 | { | 1203 | { |
1031 | vmcs_writel(GUEST_CR3, cr3); | 1204 | vmcs_writel(GUEST_CR3, cr3); |
1032 | if (vcpu->cr0 & CR0_PE_MASK) | 1205 | if (vcpu->cr0 & X86_CR0_PE) |
1033 | vmx_fpu_deactivate(vcpu); | 1206 | vmx_fpu_deactivate(vcpu); |
1034 | } | 1207 | } |
1035 | 1208 | ||
@@ -1045,23 +1218,24 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1045 | 1218 | ||
1046 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1219 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
1047 | { | 1220 | { |
1048 | struct vmx_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER); | 1221 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1222 | struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); | ||
1049 | 1223 | ||
1050 | vcpu->shadow_efer = efer; | 1224 | vcpu->shadow_efer = efer; |
1051 | if (efer & EFER_LMA) { | 1225 | if (efer & EFER_LMA) { |
1052 | vmcs_write32(VM_ENTRY_CONTROLS, | 1226 | vmcs_write32(VM_ENTRY_CONTROLS, |
1053 | vmcs_read32(VM_ENTRY_CONTROLS) | | 1227 | vmcs_read32(VM_ENTRY_CONTROLS) | |
1054 | VM_ENTRY_CONTROLS_IA32E_MASK); | 1228 | VM_ENTRY_IA32E_MODE); |
1055 | msr->data = efer; | 1229 | msr->data = efer; |
1056 | 1230 | ||
1057 | } else { | 1231 | } else { |
1058 | vmcs_write32(VM_ENTRY_CONTROLS, | 1232 | vmcs_write32(VM_ENTRY_CONTROLS, |
1059 | vmcs_read32(VM_ENTRY_CONTROLS) & | 1233 | vmcs_read32(VM_ENTRY_CONTROLS) & |
1060 | ~VM_ENTRY_CONTROLS_IA32E_MASK); | 1234 | ~VM_ENTRY_IA32E_MODE); |
1061 | 1235 | ||
1062 | msr->data = efer & ~EFER_LME; | 1236 | msr->data = efer & ~EFER_LME; |
1063 | } | 1237 | } |
1064 | setup_msrs(vcpu); | 1238 | setup_msrs(vmx); |
1065 | } | 1239 | } |
1066 | 1240 | ||
1067 | #endif | 1241 | #endif |
@@ -1210,17 +1384,6 @@ static int init_rmode_tss(struct kvm* kvm) | |||
1210 | return 1; | 1384 | return 1; |
1211 | } | 1385 | } |
1212 | 1386 | ||
1213 | static void vmcs_write32_fixedbits(u32 msr, u32 vmcs_field, u32 val) | ||
1214 | { | ||
1215 | u32 msr_high, msr_low; | ||
1216 | |||
1217 | rdmsr(msr, msr_low, msr_high); | ||
1218 | |||
1219 | val &= msr_high; | ||
1220 | val |= msr_low; | ||
1221 | vmcs_write32(vmcs_field, val); | ||
1222 | } | ||
1223 | |||
1224 | static void seg_setup(int seg) | 1387 | static void seg_setup(int seg) |
1225 | { | 1388 | { |
1226 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 1389 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
@@ -1234,7 +1397,7 @@ static void seg_setup(int seg) | |||
1234 | /* | 1397 | /* |
1235 | * Sets up the vmcs for emulated real mode. | 1398 | * Sets up the vmcs for emulated real mode. |
1236 | */ | 1399 | */ |
1237 | static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | 1400 | static int vmx_vcpu_setup(struct vcpu_vmx *vmx) |
1238 | { | 1401 | { |
1239 | u32 host_sysenter_cs; | 1402 | u32 host_sysenter_cs; |
1240 | u32 junk; | 1403 | u32 junk; |
@@ -1243,27 +1406,36 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1243 | int i; | 1406 | int i; |
1244 | int ret = 0; | 1407 | int ret = 0; |
1245 | unsigned long kvm_vmx_return; | 1408 | unsigned long kvm_vmx_return; |
1409 | u64 msr; | ||
1410 | u32 exec_control; | ||
1246 | 1411 | ||
1247 | if (!init_rmode_tss(vcpu->kvm)) { | 1412 | if (!init_rmode_tss(vmx->vcpu.kvm)) { |
1248 | ret = -ENOMEM; | 1413 | ret = -ENOMEM; |
1249 | goto out; | 1414 | goto out; |
1250 | } | 1415 | } |
1251 | 1416 | ||
1252 | memset(vcpu->regs, 0, sizeof(vcpu->regs)); | 1417 | vmx->vcpu.rmode.active = 0; |
1253 | vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); | ||
1254 | vcpu->cr8 = 0; | ||
1255 | vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | ||
1256 | if (vcpu == &vcpu->kvm->vcpus[0]) | ||
1257 | vcpu->apic_base |= MSR_IA32_APICBASE_BSP; | ||
1258 | 1418 | ||
1259 | fx_init(vcpu); | 1419 | vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
1420 | set_cr8(&vmx->vcpu, 0); | ||
1421 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | ||
1422 | if (vmx->vcpu.vcpu_id == 0) | ||
1423 | msr |= MSR_IA32_APICBASE_BSP; | ||
1424 | kvm_set_apic_base(&vmx->vcpu, msr); | ||
1425 | |||
1426 | fx_init(&vmx->vcpu); | ||
1260 | 1427 | ||
1261 | /* | 1428 | /* |
1262 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | 1429 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode |
1263 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. | 1430 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. |
1264 | */ | 1431 | */ |
1265 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); | 1432 | if (vmx->vcpu.vcpu_id == 0) { |
1266 | vmcs_writel(GUEST_CS_BASE, 0x000f0000); | 1433 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); |
1434 | vmcs_writel(GUEST_CS_BASE, 0x000f0000); | ||
1435 | } else { | ||
1436 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8); | ||
1437 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12); | ||
1438 | } | ||
1267 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | 1439 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); |
1268 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); | 1440 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); |
1269 | 1441 | ||
@@ -1288,7 +1460,10 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1288 | vmcs_writel(GUEST_SYSENTER_EIP, 0); | 1460 | vmcs_writel(GUEST_SYSENTER_EIP, 0); |
1289 | 1461 | ||
1290 | vmcs_writel(GUEST_RFLAGS, 0x02); | 1462 | vmcs_writel(GUEST_RFLAGS, 0x02); |
1291 | vmcs_writel(GUEST_RIP, 0xfff0); | 1463 | if (vmx->vcpu.vcpu_id == 0) |
1464 | vmcs_writel(GUEST_RIP, 0xfff0); | ||
1465 | else | ||
1466 | vmcs_writel(GUEST_RIP, 0); | ||
1292 | vmcs_writel(GUEST_RSP, 0); | 1467 | vmcs_writel(GUEST_RSP, 0); |
1293 | 1468 | ||
1294 | //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 | 1469 | //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 |
@@ -1316,20 +1491,18 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1316 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | 1491 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); |
1317 | 1492 | ||
1318 | /* Control */ | 1493 | /* Control */ |
1319 | vmcs_write32_fixedbits(MSR_IA32_VMX_PINBASED_CTLS, | 1494 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, |
1320 | PIN_BASED_VM_EXEC_CONTROL, | 1495 | vmcs_config.pin_based_exec_ctrl); |
1321 | PIN_BASED_EXT_INTR_MASK /* 20.6.1 */ | 1496 | |
1322 | | PIN_BASED_NMI_EXITING /* 20.6.1 */ | 1497 | exec_control = vmcs_config.cpu_based_exec_ctrl; |
1323 | ); | 1498 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { |
1324 | vmcs_write32_fixedbits(MSR_IA32_VMX_PROCBASED_CTLS, | 1499 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
1325 | CPU_BASED_VM_EXEC_CONTROL, | 1500 | #ifdef CONFIG_X86_64 |
1326 | CPU_BASED_HLT_EXITING /* 20.6.2 */ | 1501 | exec_control |= CPU_BASED_CR8_STORE_EXITING | |
1327 | | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ | 1502 | CPU_BASED_CR8_LOAD_EXITING; |
1328 | | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ | 1503 | #endif |
1329 | | CPU_BASED_ACTIVATE_IO_BITMAP /* 20.6.2 */ | 1504 | } |
1330 | | CPU_BASED_MOV_DR_EXITING | 1505 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); |
1331 | | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ | ||
1332 | ); | ||
1333 | 1506 | ||
1334 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); | 1507 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); |
1335 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); | 1508 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); |
@@ -1377,46 +1550,48 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1377 | u32 index = vmx_msr_index[i]; | 1550 | u32 index = vmx_msr_index[i]; |
1378 | u32 data_low, data_high; | 1551 | u32 data_low, data_high; |
1379 | u64 data; | 1552 | u64 data; |
1380 | int j = vcpu->nmsrs; | 1553 | int j = vmx->nmsrs; |
1381 | 1554 | ||
1382 | if (rdmsr_safe(index, &data_low, &data_high) < 0) | 1555 | if (rdmsr_safe(index, &data_low, &data_high) < 0) |
1383 | continue; | 1556 | continue; |
1384 | if (wrmsr_safe(index, data_low, data_high) < 0) | 1557 | if (wrmsr_safe(index, data_low, data_high) < 0) |
1385 | continue; | 1558 | continue; |
1386 | data = data_low | ((u64)data_high << 32); | 1559 | data = data_low | ((u64)data_high << 32); |
1387 | vcpu->host_msrs[j].index = index; | 1560 | vmx->host_msrs[j].index = index; |
1388 | vcpu->host_msrs[j].reserved = 0; | 1561 | vmx->host_msrs[j].reserved = 0; |
1389 | vcpu->host_msrs[j].data = data; | 1562 | vmx->host_msrs[j].data = data; |
1390 | vcpu->guest_msrs[j] = vcpu->host_msrs[j]; | 1563 | vmx->guest_msrs[j] = vmx->host_msrs[j]; |
1391 | ++vcpu->nmsrs; | 1564 | ++vmx->nmsrs; |
1392 | } | 1565 | } |
1393 | 1566 | ||
1394 | setup_msrs(vcpu); | 1567 | setup_msrs(vmx); |
1395 | 1568 | ||
1396 | vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, | 1569 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); |
1397 | (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ | ||
1398 | 1570 | ||
1399 | /* 22.2.1, 20.8.1 */ | 1571 | /* 22.2.1, 20.8.1 */ |
1400 | vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, | 1572 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); |
1401 | VM_ENTRY_CONTROLS, 0); | 1573 | |
1402 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ | 1574 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ |
1403 | 1575 | ||
1404 | #ifdef CONFIG_X86_64 | 1576 | #ifdef CONFIG_X86_64 |
1405 | vmcs_writel(VIRTUAL_APIC_PAGE_ADDR, 0); | 1577 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); |
1406 | vmcs_writel(TPR_THRESHOLD, 0); | 1578 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) |
1579 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | ||
1580 | page_to_phys(vmx->vcpu.apic->regs_page)); | ||
1581 | vmcs_write32(TPR_THRESHOLD, 0); | ||
1407 | #endif | 1582 | #endif |
1408 | 1583 | ||
1409 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 1584 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
1410 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); | 1585 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); |
1411 | 1586 | ||
1412 | vcpu->cr0 = 0x60000010; | 1587 | vmx->vcpu.cr0 = 0x60000010; |
1413 | vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode | 1588 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.cr0); // enter rmode |
1414 | vmx_set_cr4(vcpu, 0); | 1589 | vmx_set_cr4(&vmx->vcpu, 0); |
1415 | #ifdef CONFIG_X86_64 | 1590 | #ifdef CONFIG_X86_64 |
1416 | vmx_set_efer(vcpu, 0); | 1591 | vmx_set_efer(&vmx->vcpu, 0); |
1417 | #endif | 1592 | #endif |
1418 | vmx_fpu_activate(vcpu); | 1593 | vmx_fpu_activate(&vmx->vcpu); |
1419 | update_exception_bitmap(vcpu); | 1594 | update_exception_bitmap(&vmx->vcpu); |
1420 | 1595 | ||
1421 | return 0; | 1596 | return 0; |
1422 | 1597 | ||
@@ -1424,6 +1599,13 @@ out: | |||
1424 | return ret; | 1599 | return ret; |
1425 | } | 1600 | } |
1426 | 1601 | ||
1602 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | ||
1603 | { | ||
1604 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
1605 | |||
1606 | vmx_vcpu_setup(vmx); | ||
1607 | } | ||
1608 | |||
1427 | static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) | 1609 | static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) |
1428 | { | 1610 | { |
1429 | u16 ent[2]; | 1611 | u16 ent[2]; |
@@ -1443,8 +1625,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) | |||
1443 | return; | 1625 | return; |
1444 | } | 1626 | } |
1445 | 1627 | ||
1446 | if (kvm_read_guest(vcpu, irq * sizeof(ent), sizeof(ent), &ent) != | 1628 | if (emulator_read_std(irq * sizeof(ent), &ent, sizeof(ent), vcpu) != |
1447 | sizeof(ent)) { | 1629 | X86EMUL_CONTINUE) { |
1448 | vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__); | 1630 | vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__); |
1449 | return; | 1631 | return; |
1450 | } | 1632 | } |
@@ -1454,9 +1636,9 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) | |||
1454 | ip = vmcs_readl(GUEST_RIP); | 1636 | ip = vmcs_readl(GUEST_RIP); |
1455 | 1637 | ||
1456 | 1638 | ||
1457 | if (kvm_write_guest(vcpu, ss_base + sp - 2, 2, &flags) != 2 || | 1639 | if (emulator_write_emulated(ss_base + sp - 2, &flags, 2, vcpu) != X86EMUL_CONTINUE || |
1458 | kvm_write_guest(vcpu, ss_base + sp - 4, 2, &cs) != 2 || | 1640 | emulator_write_emulated(ss_base + sp - 4, &cs, 2, vcpu) != X86EMUL_CONTINUE || |
1459 | kvm_write_guest(vcpu, ss_base + sp - 6, 2, &ip) != 2) { | 1641 | emulator_write_emulated(ss_base + sp - 6, &ip, 2, vcpu) != X86EMUL_CONTINUE) { |
1460 | vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__); | 1642 | vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__); |
1461 | return; | 1643 | return; |
1462 | } | 1644 | } |
@@ -1469,6 +1651,16 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) | |||
1469 | vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6)); | 1651 | vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6)); |
1470 | } | 1652 | } |
1471 | 1653 | ||
1654 | static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | ||
1655 | { | ||
1656 | if (vcpu->rmode.active) { | ||
1657 | inject_rmode_irq(vcpu, irq); | ||
1658 | return; | ||
1659 | } | ||
1660 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
1661 | irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | ||
1662 | } | ||
1663 | |||
1472 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | 1664 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) |
1473 | { | 1665 | { |
1474 | int word_index = __ffs(vcpu->irq_summary); | 1666 | int word_index = __ffs(vcpu->irq_summary); |
@@ -1478,13 +1670,7 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | |||
1478 | clear_bit(bit_index, &vcpu->irq_pending[word_index]); | 1670 | clear_bit(bit_index, &vcpu->irq_pending[word_index]); |
1479 | if (!vcpu->irq_pending[word_index]) | 1671 | if (!vcpu->irq_pending[word_index]) |
1480 | clear_bit(word_index, &vcpu->irq_summary); | 1672 | clear_bit(word_index, &vcpu->irq_summary); |
1481 | 1673 | vmx_inject_irq(vcpu, irq); | |
1482 | if (vcpu->rmode.active) { | ||
1483 | inject_rmode_irq(vcpu, irq); | ||
1484 | return; | ||
1485 | } | ||
1486 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
1487 | irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | ||
1488 | } | 1674 | } |
1489 | 1675 | ||
1490 | 1676 | ||
@@ -1568,7 +1754,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1568 | "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); | 1754 | "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); |
1569 | } | 1755 | } |
1570 | 1756 | ||
1571 | if (is_external_interrupt(vect_info)) { | 1757 | if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { |
1572 | int irq = vect_info & VECTORING_INFO_VECTOR_MASK; | 1758 | int irq = vect_info & VECTORING_INFO_VECTOR_MASK; |
1573 | set_bit(irq, vcpu->irq_pending); | 1759 | set_bit(irq, vcpu->irq_pending); |
1574 | set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); | 1760 | set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); |
@@ -1591,29 +1777,28 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1591 | if (is_page_fault(intr_info)) { | 1777 | if (is_page_fault(intr_info)) { |
1592 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 1778 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
1593 | 1779 | ||
1594 | spin_lock(&vcpu->kvm->lock); | 1780 | mutex_lock(&vcpu->kvm->lock); |
1595 | r = kvm_mmu_page_fault(vcpu, cr2, error_code); | 1781 | r = kvm_mmu_page_fault(vcpu, cr2, error_code); |
1596 | if (r < 0) { | 1782 | if (r < 0) { |
1597 | spin_unlock(&vcpu->kvm->lock); | 1783 | mutex_unlock(&vcpu->kvm->lock); |
1598 | return r; | 1784 | return r; |
1599 | } | 1785 | } |
1600 | if (!r) { | 1786 | if (!r) { |
1601 | spin_unlock(&vcpu->kvm->lock); | 1787 | mutex_unlock(&vcpu->kvm->lock); |
1602 | return 1; | 1788 | return 1; |
1603 | } | 1789 | } |
1604 | 1790 | ||
1605 | er = emulate_instruction(vcpu, kvm_run, cr2, error_code); | 1791 | er = emulate_instruction(vcpu, kvm_run, cr2, error_code); |
1606 | spin_unlock(&vcpu->kvm->lock); | 1792 | mutex_unlock(&vcpu->kvm->lock); |
1607 | 1793 | ||
1608 | switch (er) { | 1794 | switch (er) { |
1609 | case EMULATE_DONE: | 1795 | case EMULATE_DONE: |
1610 | return 1; | 1796 | return 1; |
1611 | case EMULATE_DO_MMIO: | 1797 | case EMULATE_DO_MMIO: |
1612 | ++vcpu->stat.mmio_exits; | 1798 | ++vcpu->stat.mmio_exits; |
1613 | kvm_run->exit_reason = KVM_EXIT_MMIO; | ||
1614 | return 0; | 1799 | return 0; |
1615 | case EMULATE_FAIL: | 1800 | case EMULATE_FAIL: |
1616 | vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); | 1801 | kvm_report_emulation_failure(vcpu, "pagetable"); |
1617 | break; | 1802 | break; |
1618 | default: | 1803 | default: |
1619 | BUG(); | 1804 | BUG(); |
@@ -1653,80 +1838,29 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1653 | return 0; | 1838 | return 0; |
1654 | } | 1839 | } |
1655 | 1840 | ||
1656 | static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) | ||
1657 | { | ||
1658 | u64 inst; | ||
1659 | gva_t rip; | ||
1660 | int countr_size; | ||
1661 | int i, n; | ||
1662 | |||
1663 | if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) { | ||
1664 | countr_size = 2; | ||
1665 | } else { | ||
1666 | u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES); | ||
1667 | |||
1668 | countr_size = (cs_ar & AR_L_MASK) ? 8: | ||
1669 | (cs_ar & AR_DB_MASK) ? 4: 2; | ||
1670 | } | ||
1671 | |||
1672 | rip = vmcs_readl(GUEST_RIP); | ||
1673 | if (countr_size != 8) | ||
1674 | rip += vmcs_readl(GUEST_CS_BASE); | ||
1675 | |||
1676 | n = kvm_read_guest(vcpu, rip, sizeof(inst), &inst); | ||
1677 | |||
1678 | for (i = 0; i < n; i++) { | ||
1679 | switch (((u8*)&inst)[i]) { | ||
1680 | case 0xf0: | ||
1681 | case 0xf2: | ||
1682 | case 0xf3: | ||
1683 | case 0x2e: | ||
1684 | case 0x36: | ||
1685 | case 0x3e: | ||
1686 | case 0x26: | ||
1687 | case 0x64: | ||
1688 | case 0x65: | ||
1689 | case 0x66: | ||
1690 | break; | ||
1691 | case 0x67: | ||
1692 | countr_size = (countr_size == 2) ? 4: (countr_size >> 1); | ||
1693 | default: | ||
1694 | goto done; | ||
1695 | } | ||
1696 | } | ||
1697 | return 0; | ||
1698 | done: | ||
1699 | countr_size *= 8; | ||
1700 | *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); | ||
1701 | //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); | ||
1702 | return 1; | ||
1703 | } | ||
1704 | |||
1705 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1841 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1706 | { | 1842 | { |
1707 | u64 exit_qualification; | 1843 | unsigned long exit_qualification; |
1708 | int size, down, in, string, rep; | 1844 | int size, down, in, string, rep; |
1709 | unsigned port; | 1845 | unsigned port; |
1710 | unsigned long count; | ||
1711 | gva_t address; | ||
1712 | 1846 | ||
1713 | ++vcpu->stat.io_exits; | 1847 | ++vcpu->stat.io_exits; |
1714 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 1848 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
1715 | in = (exit_qualification & 8) != 0; | ||
1716 | size = (exit_qualification & 7) + 1; | ||
1717 | string = (exit_qualification & 16) != 0; | 1849 | string = (exit_qualification & 16) != 0; |
1850 | |||
1851 | if (string) { | ||
1852 | if (emulate_instruction(vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO) | ||
1853 | return 0; | ||
1854 | return 1; | ||
1855 | } | ||
1856 | |||
1857 | size = (exit_qualification & 7) + 1; | ||
1858 | in = (exit_qualification & 8) != 0; | ||
1718 | down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; | 1859 | down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; |
1719 | count = 1; | ||
1720 | rep = (exit_qualification & 32) != 0; | 1860 | rep = (exit_qualification & 32) != 0; |
1721 | port = exit_qualification >> 16; | 1861 | port = exit_qualification >> 16; |
1722 | address = 0; | 1862 | |
1723 | if (string) { | 1863 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); |
1724 | if (rep && !get_io_count(vcpu, &count)) | ||
1725 | return 1; | ||
1726 | address = vmcs_readl(GUEST_LINEAR_ADDRESS); | ||
1727 | } | ||
1728 | return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, | ||
1729 | address, rep, port); | ||
1730 | } | 1864 | } |
1731 | 1865 | ||
1732 | static void | 1866 | static void |
@@ -1743,11 +1877,11 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
1743 | 1877 | ||
1744 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1878 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1745 | { | 1879 | { |
1746 | u64 exit_qualification; | 1880 | unsigned long exit_qualification; |
1747 | int cr; | 1881 | int cr; |
1748 | int reg; | 1882 | int reg; |
1749 | 1883 | ||
1750 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 1884 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
1751 | cr = exit_qualification & 15; | 1885 | cr = exit_qualification & 15; |
1752 | reg = (exit_qualification >> 8) & 15; | 1886 | reg = (exit_qualification >> 8) & 15; |
1753 | switch ((exit_qualification >> 4) & 3) { | 1887 | switch ((exit_qualification >> 4) & 3) { |
@@ -1772,13 +1906,14 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1772 | vcpu_load_rsp_rip(vcpu); | 1906 | vcpu_load_rsp_rip(vcpu); |
1773 | set_cr8(vcpu, vcpu->regs[reg]); | 1907 | set_cr8(vcpu, vcpu->regs[reg]); |
1774 | skip_emulated_instruction(vcpu); | 1908 | skip_emulated_instruction(vcpu); |
1775 | return 1; | 1909 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; |
1910 | return 0; | ||
1776 | }; | 1911 | }; |
1777 | break; | 1912 | break; |
1778 | case 2: /* clts */ | 1913 | case 2: /* clts */ |
1779 | vcpu_load_rsp_rip(vcpu); | 1914 | vcpu_load_rsp_rip(vcpu); |
1780 | vmx_fpu_deactivate(vcpu); | 1915 | vmx_fpu_deactivate(vcpu); |
1781 | vcpu->cr0 &= ~CR0_TS_MASK; | 1916 | vcpu->cr0 &= ~X86_CR0_TS; |
1782 | vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); | 1917 | vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); |
1783 | vmx_fpu_activate(vcpu); | 1918 | vmx_fpu_activate(vcpu); |
1784 | skip_emulated_instruction(vcpu); | 1919 | skip_emulated_instruction(vcpu); |
@@ -1793,7 +1928,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1793 | return 1; | 1928 | return 1; |
1794 | case 8: | 1929 | case 8: |
1795 | vcpu_load_rsp_rip(vcpu); | 1930 | vcpu_load_rsp_rip(vcpu); |
1796 | vcpu->regs[reg] = vcpu->cr8; | 1931 | vcpu->regs[reg] = get_cr8(vcpu); |
1797 | vcpu_put_rsp_rip(vcpu); | 1932 | vcpu_put_rsp_rip(vcpu); |
1798 | skip_emulated_instruction(vcpu); | 1933 | skip_emulated_instruction(vcpu); |
1799 | return 1; | 1934 | return 1; |
@@ -1808,14 +1943,14 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1808 | break; | 1943 | break; |
1809 | } | 1944 | } |
1810 | kvm_run->exit_reason = 0; | 1945 | kvm_run->exit_reason = 0; |
1811 | printk(KERN_ERR "kvm: unhandled control register: op %d cr %d\n", | 1946 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", |
1812 | (int)(exit_qualification >> 4) & 3, cr); | 1947 | (int)(exit_qualification >> 4) & 3, cr); |
1813 | return 0; | 1948 | return 0; |
1814 | } | 1949 | } |
1815 | 1950 | ||
1816 | static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1951 | static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1817 | { | 1952 | { |
1818 | u64 exit_qualification; | 1953 | unsigned long exit_qualification; |
1819 | unsigned long val; | 1954 | unsigned long val; |
1820 | int dr, reg; | 1955 | int dr, reg; |
1821 | 1956 | ||
@@ -1823,7 +1958,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1823 | * FIXME: this code assumes the host is debugging the guest. | 1958 | * FIXME: this code assumes the host is debugging the guest. |
1824 | * need to deal with guest debugging itself too. | 1959 | * need to deal with guest debugging itself too. |
1825 | */ | 1960 | */ |
1826 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 1961 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
1827 | dr = exit_qualification & 7; | 1962 | dr = exit_qualification & 7; |
1828 | reg = (exit_qualification >> 8) & 15; | 1963 | reg = (exit_qualification >> 8) & 15; |
1829 | vcpu_load_rsp_rip(vcpu); | 1964 | vcpu_load_rsp_rip(vcpu); |
@@ -1886,19 +2021,21 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1886 | return 1; | 2021 | return 1; |
1887 | } | 2022 | } |
1888 | 2023 | ||
1889 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | 2024 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, |
1890 | struct kvm_run *kvm_run) | 2025 | struct kvm_run *kvm_run) |
1891 | { | 2026 | { |
1892 | kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; | 2027 | return 1; |
1893 | kvm_run->cr8 = vcpu->cr8; | ||
1894 | kvm_run->apic_base = vcpu->apic_base; | ||
1895 | kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && | ||
1896 | vcpu->irq_summary == 0); | ||
1897 | } | 2028 | } |
1898 | 2029 | ||
1899 | static int handle_interrupt_window(struct kvm_vcpu *vcpu, | 2030 | static int handle_interrupt_window(struct kvm_vcpu *vcpu, |
1900 | struct kvm_run *kvm_run) | 2031 | struct kvm_run *kvm_run) |
1901 | { | 2032 | { |
2033 | u32 cpu_based_vm_exec_control; | ||
2034 | |||
2035 | /* clear pending irq */ | ||
2036 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
2037 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | ||
2038 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
1902 | /* | 2039 | /* |
1903 | * If the user space waits to inject interrupts, exit as soon as | 2040 | * If the user space waits to inject interrupts, exit as soon as |
1904 | * possible | 2041 | * possible |
@@ -1943,6 +2080,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
1943 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, | 2080 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, |
1944 | [EXIT_REASON_HLT] = handle_halt, | 2081 | [EXIT_REASON_HLT] = handle_halt, |
1945 | [EXIT_REASON_VMCALL] = handle_vmcall, | 2082 | [EXIT_REASON_VMCALL] = handle_vmcall, |
2083 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold | ||
1946 | }; | 2084 | }; |
1947 | 2085 | ||
1948 | static const int kvm_vmx_max_exit_handlers = | 2086 | static const int kvm_vmx_max_exit_handlers = |
@@ -1956,6 +2094,14 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1956 | { | 2094 | { |
1957 | u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 2095 | u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
1958 | u32 exit_reason = vmcs_read32(VM_EXIT_REASON); | 2096 | u32 exit_reason = vmcs_read32(VM_EXIT_REASON); |
2097 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2098 | |||
2099 | if (unlikely(vmx->fail)) { | ||
2100 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
2101 | kvm_run->fail_entry.hardware_entry_failure_reason | ||
2102 | = vmcs_read32(VM_INSTRUCTION_ERROR); | ||
2103 | return 0; | ||
2104 | } | ||
1959 | 2105 | ||
1960 | if ( (vectoring_info & VECTORING_INFO_VALID_MASK) && | 2106 | if ( (vectoring_info & VECTORING_INFO_VALID_MASK) && |
1961 | exit_reason != EXIT_REASON_EXCEPTION_NMI ) | 2107 | exit_reason != EXIT_REASON_EXCEPTION_NMI ) |
@@ -1971,57 +2117,91 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1971 | return 0; | 2117 | return 0; |
1972 | } | 2118 | } |
1973 | 2119 | ||
1974 | /* | 2120 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) |
1975 | * Check if userspace requested an interrupt window, and that the | ||
1976 | * interrupt window is open. | ||
1977 | * | ||
1978 | * No need to exit to userspace if we already have an interrupt queued. | ||
1979 | */ | ||
1980 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | ||
1981 | struct kvm_run *kvm_run) | ||
1982 | { | 2121 | { |
1983 | return (!vcpu->irq_summary && | ||
1984 | kvm_run->request_interrupt_window && | ||
1985 | vcpu->interrupt_window_open && | ||
1986 | (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); | ||
1987 | } | 2122 | } |
1988 | 2123 | ||
1989 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 2124 | static void update_tpr_threshold(struct kvm_vcpu *vcpu) |
1990 | { | 2125 | { |
2126 | int max_irr, tpr; | ||
2127 | |||
2128 | if (!vm_need_tpr_shadow(vcpu->kvm)) | ||
2129 | return; | ||
2130 | |||
2131 | if (!kvm_lapic_enabled(vcpu) || | ||
2132 | ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) { | ||
2133 | vmcs_write32(TPR_THRESHOLD, 0); | ||
2134 | return; | ||
2135 | } | ||
2136 | |||
2137 | tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4; | ||
2138 | vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); | ||
1991 | } | 2139 | } |
1992 | 2140 | ||
1993 | static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2141 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
1994 | { | 2142 | { |
1995 | u8 fail; | 2143 | u32 cpu_based_vm_exec_control; |
1996 | int r; | ||
1997 | 2144 | ||
1998 | preempted: | 2145 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
1999 | if (vcpu->guest_debug.enabled) | 2146 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
2000 | kvm_guest_debug_pre(vcpu); | 2147 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
2148 | } | ||
2001 | 2149 | ||
2002 | again: | 2150 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) |
2003 | if (!vcpu->mmio_read_completed) | 2151 | { |
2004 | do_interrupt_requests(vcpu, kvm_run); | 2152 | u32 idtv_info_field, intr_info_field; |
2153 | int has_ext_irq, interrupt_window_open; | ||
2154 | int vector; | ||
2005 | 2155 | ||
2006 | vmx_save_host_state(vcpu); | 2156 | kvm_inject_pending_timer_irqs(vcpu); |
2007 | kvm_load_guest_fpu(vcpu); | 2157 | update_tpr_threshold(vcpu); |
2008 | 2158 | ||
2009 | r = kvm_mmu_reload(vcpu); | 2159 | has_ext_irq = kvm_cpu_has_interrupt(vcpu); |
2010 | if (unlikely(r)) | 2160 | intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); |
2011 | goto out; | 2161 | idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
2162 | if (intr_info_field & INTR_INFO_VALID_MASK) { | ||
2163 | if (idtv_info_field & INTR_INFO_VALID_MASK) { | ||
2164 | /* TODO: fault when IDT_Vectoring */ | ||
2165 | printk(KERN_ERR "Fault when IDT_Vectoring\n"); | ||
2166 | } | ||
2167 | if (has_ext_irq) | ||
2168 | enable_irq_window(vcpu); | ||
2169 | return; | ||
2170 | } | ||
2171 | if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { | ||
2172 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); | ||
2173 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | ||
2174 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); | ||
2175 | |||
2176 | if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK)) | ||
2177 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
2178 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); | ||
2179 | if (unlikely(has_ext_irq)) | ||
2180 | enable_irq_window(vcpu); | ||
2181 | return; | ||
2182 | } | ||
2183 | if (!has_ext_irq) | ||
2184 | return; | ||
2185 | interrupt_window_open = | ||
2186 | ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
2187 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); | ||
2188 | if (interrupt_window_open) { | ||
2189 | vector = kvm_cpu_get_interrupt(vcpu); | ||
2190 | vmx_inject_irq(vcpu, vector); | ||
2191 | kvm_timer_intr_post(vcpu, vector); | ||
2192 | } else | ||
2193 | enable_irq_window(vcpu); | ||
2194 | } | ||
2195 | |||
2196 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2197 | { | ||
2198 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2012 | 2199 | ||
2013 | /* | 2200 | /* |
2014 | * Loading guest fpu may have cleared host cr0.ts | 2201 | * Loading guest fpu may have cleared host cr0.ts |
2015 | */ | 2202 | */ |
2016 | vmcs_writel(HOST_CR0, read_cr0()); | 2203 | vmcs_writel(HOST_CR0, read_cr0()); |
2017 | 2204 | ||
2018 | local_irq_disable(); | ||
2019 | |||
2020 | vcpu->guest_mode = 1; | ||
2021 | if (vcpu->requests) | ||
2022 | if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) | ||
2023 | vmx_flush_tlb(vcpu); | ||
2024 | |||
2025 | asm ( | 2205 | asm ( |
2026 | /* Store host registers */ | 2206 | /* Store host registers */ |
2027 | #ifdef CONFIG_X86_64 | 2207 | #ifdef CONFIG_X86_64 |
@@ -2115,8 +2295,8 @@ again: | |||
2115 | "pop %%ecx; popa \n\t" | 2295 | "pop %%ecx; popa \n\t" |
2116 | #endif | 2296 | #endif |
2117 | "setbe %0 \n\t" | 2297 | "setbe %0 \n\t" |
2118 | : "=q" (fail) | 2298 | : "=q" (vmx->fail) |
2119 | : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP), | 2299 | : "r"(vmx->launched), "d"((unsigned long)HOST_RSP), |
2120 | "c"(vcpu), | 2300 | "c"(vcpu), |
2121 | [rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])), | 2301 | [rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])), |
2122 | [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), | 2302 | [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), |
@@ -2138,59 +2318,10 @@ again: | |||
2138 | [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) | 2318 | [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) |
2139 | : "cc", "memory" ); | 2319 | : "cc", "memory" ); |
2140 | 2320 | ||
2141 | vcpu->guest_mode = 0; | ||
2142 | local_irq_enable(); | ||
2143 | |||
2144 | ++vcpu->stat.exits; | ||
2145 | |||
2146 | vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; | 2321 | vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; |
2147 | 2322 | ||
2148 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 2323 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
2149 | 2324 | vmx->launched = 1; | |
2150 | if (unlikely(fail)) { | ||
2151 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
2152 | kvm_run->fail_entry.hardware_entry_failure_reason | ||
2153 | = vmcs_read32(VM_INSTRUCTION_ERROR); | ||
2154 | r = 0; | ||
2155 | goto out; | ||
2156 | } | ||
2157 | /* | ||
2158 | * Profile KVM exit RIPs: | ||
2159 | */ | ||
2160 | if (unlikely(prof_on == KVM_PROFILING)) | ||
2161 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); | ||
2162 | |||
2163 | vcpu->launched = 1; | ||
2164 | r = kvm_handle_exit(kvm_run, vcpu); | ||
2165 | if (r > 0) { | ||
2166 | /* Give scheduler a change to reschedule. */ | ||
2167 | if (signal_pending(current)) { | ||
2168 | r = -EINTR; | ||
2169 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
2170 | ++vcpu->stat.signal_exits; | ||
2171 | goto out; | ||
2172 | } | ||
2173 | |||
2174 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | ||
2175 | r = -EINTR; | ||
2176 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
2177 | ++vcpu->stat.request_irq_exits; | ||
2178 | goto out; | ||
2179 | } | ||
2180 | if (!need_resched()) { | ||
2181 | ++vcpu->stat.light_exits; | ||
2182 | goto again; | ||
2183 | } | ||
2184 | } | ||
2185 | |||
2186 | out: | ||
2187 | if (r > 0) { | ||
2188 | kvm_resched(vcpu); | ||
2189 | goto preempted; | ||
2190 | } | ||
2191 | |||
2192 | post_kvm_run_save(vcpu, kvm_run); | ||
2193 | return r; | ||
2194 | } | 2325 | } |
2195 | 2326 | ||
2196 | static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, | 2327 | static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, |
@@ -2225,67 +2356,118 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, | |||
2225 | 2356 | ||
2226 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) | 2357 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) |
2227 | { | 2358 | { |
2228 | if (vcpu->vmcs) { | 2359 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2229 | on_each_cpu(__vcpu_clear, vcpu, 0, 1); | 2360 | |
2230 | free_vmcs(vcpu->vmcs); | 2361 | if (vmx->vmcs) { |
2231 | vcpu->vmcs = NULL; | 2362 | on_each_cpu(__vcpu_clear, vmx, 0, 1); |
2363 | free_vmcs(vmx->vmcs); | ||
2364 | vmx->vmcs = NULL; | ||
2232 | } | 2365 | } |
2233 | } | 2366 | } |
2234 | 2367 | ||
2235 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | 2368 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) |
2236 | { | 2369 | { |
2370 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2371 | |||
2237 | vmx_free_vmcs(vcpu); | 2372 | vmx_free_vmcs(vcpu); |
2373 | kfree(vmx->host_msrs); | ||
2374 | kfree(vmx->guest_msrs); | ||
2375 | kvm_vcpu_uninit(vcpu); | ||
2376 | kmem_cache_free(kvm_vcpu_cache, vmx); | ||
2238 | } | 2377 | } |
2239 | 2378 | ||
2240 | static int vmx_create_vcpu(struct kvm_vcpu *vcpu) | 2379 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
2241 | { | 2380 | { |
2242 | struct vmcs *vmcs; | 2381 | int err; |
2382 | struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | ||
2383 | int cpu; | ||
2243 | 2384 | ||
2244 | vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | 2385 | if (!vmx) |
2245 | if (!vcpu->guest_msrs) | 2386 | return ERR_PTR(-ENOMEM); |
2246 | return -ENOMEM; | ||
2247 | 2387 | ||
2248 | vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | 2388 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
2249 | if (!vcpu->host_msrs) | 2389 | if (err) |
2250 | goto out_free_guest_msrs; | 2390 | goto free_vcpu; |
2251 | 2391 | ||
2252 | vmcs = alloc_vmcs(); | 2392 | if (irqchip_in_kernel(kvm)) { |
2253 | if (!vmcs) | 2393 | err = kvm_create_lapic(&vmx->vcpu); |
2254 | goto out_free_msrs; | 2394 | if (err < 0) |
2395 | goto free_vcpu; | ||
2396 | } | ||
2255 | 2397 | ||
2256 | vmcs_clear(vmcs); | 2398 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); |
2257 | vcpu->vmcs = vmcs; | 2399 | if (!vmx->guest_msrs) { |
2258 | vcpu->launched = 0; | 2400 | err = -ENOMEM; |
2401 | goto uninit_vcpu; | ||
2402 | } | ||
2259 | 2403 | ||
2260 | return 0; | 2404 | vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); |
2405 | if (!vmx->host_msrs) | ||
2406 | goto free_guest_msrs; | ||
2261 | 2407 | ||
2262 | out_free_msrs: | 2408 | vmx->vmcs = alloc_vmcs(); |
2263 | kfree(vcpu->host_msrs); | 2409 | if (!vmx->vmcs) |
2264 | vcpu->host_msrs = NULL; | 2410 | goto free_msrs; |
2265 | 2411 | ||
2266 | out_free_guest_msrs: | 2412 | vmcs_clear(vmx->vmcs); |
2267 | kfree(vcpu->guest_msrs); | ||
2268 | vcpu->guest_msrs = NULL; | ||
2269 | 2413 | ||
2270 | return -ENOMEM; | 2414 | cpu = get_cpu(); |
2415 | vmx_vcpu_load(&vmx->vcpu, cpu); | ||
2416 | err = vmx_vcpu_setup(vmx); | ||
2417 | vmx_vcpu_put(&vmx->vcpu); | ||
2418 | put_cpu(); | ||
2419 | if (err) | ||
2420 | goto free_vmcs; | ||
2421 | |||
2422 | return &vmx->vcpu; | ||
2423 | |||
2424 | free_vmcs: | ||
2425 | free_vmcs(vmx->vmcs); | ||
2426 | free_msrs: | ||
2427 | kfree(vmx->host_msrs); | ||
2428 | free_guest_msrs: | ||
2429 | kfree(vmx->guest_msrs); | ||
2430 | uninit_vcpu: | ||
2431 | kvm_vcpu_uninit(&vmx->vcpu); | ||
2432 | free_vcpu: | ||
2433 | kmem_cache_free(kvm_vcpu_cache, vmx); | ||
2434 | return ERR_PTR(err); | ||
2435 | } | ||
2436 | |||
2437 | static void __init vmx_check_processor_compat(void *rtn) | ||
2438 | { | ||
2439 | struct vmcs_config vmcs_conf; | ||
2440 | |||
2441 | *(int *)rtn = 0; | ||
2442 | if (setup_vmcs_config(&vmcs_conf) < 0) | ||
2443 | *(int *)rtn = -EIO; | ||
2444 | if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { | ||
2445 | printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", | ||
2446 | smp_processor_id()); | ||
2447 | *(int *)rtn = -EIO; | ||
2448 | } | ||
2271 | } | 2449 | } |
2272 | 2450 | ||
2273 | static struct kvm_arch_ops vmx_arch_ops = { | 2451 | static struct kvm_x86_ops vmx_x86_ops = { |
2274 | .cpu_has_kvm_support = cpu_has_kvm_support, | 2452 | .cpu_has_kvm_support = cpu_has_kvm_support, |
2275 | .disabled_by_bios = vmx_disabled_by_bios, | 2453 | .disabled_by_bios = vmx_disabled_by_bios, |
2276 | .hardware_setup = hardware_setup, | 2454 | .hardware_setup = hardware_setup, |
2277 | .hardware_unsetup = hardware_unsetup, | 2455 | .hardware_unsetup = hardware_unsetup, |
2456 | .check_processor_compatibility = vmx_check_processor_compat, | ||
2278 | .hardware_enable = hardware_enable, | 2457 | .hardware_enable = hardware_enable, |
2279 | .hardware_disable = hardware_disable, | 2458 | .hardware_disable = hardware_disable, |
2280 | 2459 | ||
2281 | .vcpu_create = vmx_create_vcpu, | 2460 | .vcpu_create = vmx_create_vcpu, |
2282 | .vcpu_free = vmx_free_vcpu, | 2461 | .vcpu_free = vmx_free_vcpu, |
2462 | .vcpu_reset = vmx_vcpu_reset, | ||
2283 | 2463 | ||
2464 | .prepare_guest_switch = vmx_save_host_state, | ||
2284 | .vcpu_load = vmx_vcpu_load, | 2465 | .vcpu_load = vmx_vcpu_load, |
2285 | .vcpu_put = vmx_vcpu_put, | 2466 | .vcpu_put = vmx_vcpu_put, |
2286 | .vcpu_decache = vmx_vcpu_decache, | 2467 | .vcpu_decache = vmx_vcpu_decache, |
2287 | 2468 | ||
2288 | .set_guest_debug = set_guest_debug, | 2469 | .set_guest_debug = set_guest_debug, |
2470 | .guest_debug_pre = kvm_guest_debug_pre, | ||
2289 | .get_msr = vmx_get_msr, | 2471 | .get_msr = vmx_get_msr, |
2290 | .set_msr = vmx_set_msr, | 2472 | .set_msr = vmx_set_msr, |
2291 | .get_segment_base = vmx_get_segment_base, | 2473 | .get_segment_base = vmx_get_segment_base, |
@@ -2314,9 +2496,13 @@ static struct kvm_arch_ops vmx_arch_ops = { | |||
2314 | .inject_gp = vmx_inject_gp, | 2496 | .inject_gp = vmx_inject_gp, |
2315 | 2497 | ||
2316 | .run = vmx_vcpu_run, | 2498 | .run = vmx_vcpu_run, |
2499 | .handle_exit = kvm_handle_exit, | ||
2317 | .skip_emulated_instruction = skip_emulated_instruction, | 2500 | .skip_emulated_instruction = skip_emulated_instruction, |
2318 | .vcpu_setup = vmx_vcpu_setup, | ||
2319 | .patch_hypercall = vmx_patch_hypercall, | 2501 | .patch_hypercall = vmx_patch_hypercall, |
2502 | .get_irq = vmx_get_irq, | ||
2503 | .set_irq = vmx_inject_irq, | ||
2504 | .inject_pending_irq = vmx_intr_assist, | ||
2505 | .inject_pending_vectors = do_interrupt_requests, | ||
2320 | }; | 2506 | }; |
2321 | 2507 | ||
2322 | static int __init vmx_init(void) | 2508 | static int __init vmx_init(void) |
@@ -2347,7 +2533,7 @@ static int __init vmx_init(void) | |||
2347 | memset(iova, 0xff, PAGE_SIZE); | 2533 | memset(iova, 0xff, PAGE_SIZE); |
2348 | kunmap(vmx_io_bitmap_b); | 2534 | kunmap(vmx_io_bitmap_b); |
2349 | 2535 | ||
2350 | r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE); | 2536 | r = kvm_init_x86(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); |
2351 | if (r) | 2537 | if (r) |
2352 | goto out1; | 2538 | goto out1; |
2353 | 2539 | ||
@@ -2365,7 +2551,7 @@ static void __exit vmx_exit(void) | |||
2365 | __free_page(vmx_io_bitmap_b); | 2551 | __free_page(vmx_io_bitmap_b); |
2366 | __free_page(vmx_io_bitmap_a); | 2552 | __free_page(vmx_io_bitmap_a); |
2367 | 2553 | ||
2368 | kvm_exit_arch(); | 2554 | kvm_exit_x86(); |
2369 | } | 2555 | } |
2370 | 2556 | ||
2371 | module_init(vmx_init) | 2557 | module_init(vmx_init) |
diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h index d0dc93df411b..fd4e14666088 100644 --- a/drivers/kvm/vmx.h +++ b/drivers/kvm/vmx.h | |||
@@ -25,29 +25,36 @@ | |||
25 | * | 25 | * |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 | 28 | #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 |
29 | #define CPU_BASED_USE_TSC_OFFSETING 0x00000008 | 29 | #define CPU_BASED_USE_TSC_OFFSETING 0x00000008 |
30 | #define CPU_BASED_HLT_EXITING 0x00000080 | 30 | #define CPU_BASED_HLT_EXITING 0x00000080 |
31 | #define CPU_BASED_INVDPG_EXITING 0x00000200 | 31 | #define CPU_BASED_INVLPG_EXITING 0x00000200 |
32 | #define CPU_BASED_MWAIT_EXITING 0x00000400 | 32 | #define CPU_BASED_MWAIT_EXITING 0x00000400 |
33 | #define CPU_BASED_RDPMC_EXITING 0x00000800 | 33 | #define CPU_BASED_RDPMC_EXITING 0x00000800 |
34 | #define CPU_BASED_RDTSC_EXITING 0x00001000 | 34 | #define CPU_BASED_RDTSC_EXITING 0x00001000 |
35 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 | 35 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 |
36 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 | 36 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 |
37 | #define CPU_BASED_TPR_SHADOW 0x00200000 | 37 | #define CPU_BASED_TPR_SHADOW 0x00200000 |
38 | #define CPU_BASED_MOV_DR_EXITING 0x00800000 | 38 | #define CPU_BASED_MOV_DR_EXITING 0x00800000 |
39 | #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 | 39 | #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 |
40 | #define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000 | 40 | #define CPU_BASED_USE_IO_BITMAPS 0x02000000 |
41 | #define CPU_BASED_MSR_BITMAPS 0x10000000 | 41 | #define CPU_BASED_USE_MSR_BITMAPS 0x10000000 |
42 | #define CPU_BASED_MONITOR_EXITING 0x20000000 | 42 | #define CPU_BASED_MONITOR_EXITING 0x20000000 |
43 | #define CPU_BASED_PAUSE_EXITING 0x40000000 | 43 | #define CPU_BASED_PAUSE_EXITING 0x40000000 |
44 | #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 | ||
44 | 45 | ||
45 | #define PIN_BASED_EXT_INTR_MASK 0x1 | 46 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
46 | #define PIN_BASED_NMI_EXITING 0x8 | 47 | #define PIN_BASED_NMI_EXITING 0x00000008 |
48 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 | ||
47 | 49 | ||
48 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 | 50 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 |
49 | #define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200 | 51 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 |
50 | 52 | ||
53 | #define VM_ENTRY_IA32E_MODE 0x00000200 | ||
54 | #define VM_ENTRY_SMM 0x00000400 | ||
55 | #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 | ||
56 | |||
57 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | ||
51 | 58 | ||
52 | /* VMCS Encodings */ | 59 | /* VMCS Encodings */ |
53 | enum vmcs_field { | 60 | enum vmcs_field { |
@@ -206,6 +213,7 @@ enum vmcs_field { | |||
206 | #define EXIT_REASON_MSR_READ 31 | 213 | #define EXIT_REASON_MSR_READ 31 |
207 | #define EXIT_REASON_MSR_WRITE 32 | 214 | #define EXIT_REASON_MSR_WRITE 32 |
208 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 215 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
216 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | ||
209 | 217 | ||
210 | /* | 218 | /* |
211 | * Interruption-information format | 219 | * Interruption-information format |
@@ -261,9 +269,6 @@ enum vmcs_field { | |||
261 | /* segment AR */ | 269 | /* segment AR */ |
262 | #define SEGMENT_AR_L_MASK (1 << 13) | 270 | #define SEGMENT_AR_L_MASK (1 << 13) |
263 | 271 | ||
264 | /* entry controls */ | ||
265 | #define VM_ENTRY_CONTROLS_IA32E_MASK (1 << 9) | ||
266 | |||
267 | #define AR_TYPE_ACCESSES_MASK 1 | 272 | #define AR_TYPE_ACCESSES_MASK 1 |
268 | #define AR_TYPE_READABLE_MASK (1 << 1) | 273 | #define AR_TYPE_READABLE_MASK (1 << 1) |
269 | #define AR_TYPE_WRITEABLE_MASK (1 << 2) | 274 | #define AR_TYPE_WRITEABLE_MASK (1 << 2) |
@@ -285,13 +290,21 @@ enum vmcs_field { | |||
285 | 290 | ||
286 | #define AR_RESERVD_MASK 0xfffe0f00 | 291 | #define AR_RESERVD_MASK 0xfffe0f00 |
287 | 292 | ||
288 | #define CR4_VMXE 0x2000 | 293 | #define MSR_IA32_VMX_BASIC 0x480 |
294 | #define MSR_IA32_VMX_PINBASED_CTLS 0x481 | ||
295 | #define MSR_IA32_VMX_PROCBASED_CTLS 0x482 | ||
296 | #define MSR_IA32_VMX_EXIT_CTLS 0x483 | ||
297 | #define MSR_IA32_VMX_ENTRY_CTLS 0x484 | ||
298 | #define MSR_IA32_VMX_MISC 0x485 | ||
299 | #define MSR_IA32_VMX_CR0_FIXED0 0x486 | ||
300 | #define MSR_IA32_VMX_CR0_FIXED1 0x487 | ||
301 | #define MSR_IA32_VMX_CR4_FIXED0 0x488 | ||
302 | #define MSR_IA32_VMX_CR4_FIXED1 0x489 | ||
303 | #define MSR_IA32_VMX_VMCS_ENUM 0x48a | ||
304 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b | ||
289 | 305 | ||
290 | #define MSR_IA32_VMX_BASIC 0x480 | 306 | #define MSR_IA32_FEATURE_CONTROL 0x3a |
291 | #define MSR_IA32_FEATURE_CONTROL 0x03a | 307 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 |
292 | #define MSR_IA32_VMX_PINBASED_CTLS 0x481 | 308 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 |
293 | #define MSR_IA32_VMX_PROCBASED_CTLS 0x482 | ||
294 | #define MSR_IA32_VMX_EXIT_CTLS 0x483 | ||
295 | #define MSR_IA32_VMX_ENTRY_CTLS 0x484 | ||
296 | 309 | ||
297 | #endif | 310 | #endif |
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 4b8a0cc9665e..9737c3b2f48c 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * Copyright (c) 2005 Keir Fraser | 6 | * Copyright (c) 2005 Keir Fraser |
7 | * | 7 | * |
8 | * Linux coding style, mod r/m decoder, segment base fixes, real-mode | 8 | * Linux coding style, mod r/m decoder, segment base fixes, real-mode |
9 | * privieged instructions: | 9 | * privileged instructions: |
10 | * | 10 | * |
11 | * Copyright (C) 2006 Qumranet | 11 | * Copyright (C) 2006 Qumranet |
12 | * | 12 | * |
@@ -83,7 +83,7 @@ static u8 opcode_table[256] = { | |||
83 | /* 0x20 - 0x27 */ | 83 | /* 0x20 - 0x27 */ |
84 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 84 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
85 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 85 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
86 | 0, 0, 0, 0, | 86 | SrcImmByte, SrcImm, 0, 0, |
87 | /* 0x28 - 0x2F */ | 87 | /* 0x28 - 0x2F */ |
88 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 88 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
89 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 89 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
@@ -99,15 +99,24 @@ static u8 opcode_table[256] = { | |||
99 | /* 0x40 - 0x4F */ | 99 | /* 0x40 - 0x4F */ |
100 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 100 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
101 | /* 0x50 - 0x57 */ | 101 | /* 0x50 - 0x57 */ |
102 | 0, 0, 0, 0, 0, 0, 0, 0, | 102 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, |
103 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
103 | /* 0x58 - 0x5F */ | 104 | /* 0x58 - 0x5F */ |
104 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 105 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, |
105 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 106 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, |
106 | /* 0x60 - 0x6F */ | 107 | /* 0x60 - 0x67 */ |
107 | 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | 108 | 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , |
108 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 109 | 0, 0, 0, 0, |
109 | /* 0x70 - 0x7F */ | 110 | /* 0x68 - 0x6F */ |
110 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 111 | 0, 0, ImplicitOps|Mov, 0, |
112 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | ||
113 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | ||
114 | /* 0x70 - 0x77 */ | ||
115 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
116 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
117 | /* 0x78 - 0x7F */ | ||
118 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
119 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
111 | /* 0x80 - 0x87 */ | 120 | /* 0x80 - 0x87 */ |
112 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 121 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, |
113 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, | 122 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, |
@@ -116,9 +125,9 @@ static u8 opcode_table[256] = { | |||
116 | /* 0x88 - 0x8F */ | 125 | /* 0x88 - 0x8F */ |
117 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 126 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
118 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 127 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
119 | 0, 0, 0, DstMem | SrcNone | ModRM | Mov, | 128 | 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov, |
120 | /* 0x90 - 0x9F */ | 129 | /* 0x90 - 0x9F */ |
121 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 130 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 0, |
122 | /* 0xA0 - 0xA7 */ | 131 | /* 0xA0 - 0xA7 */ |
123 | ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov, | 132 | ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov, |
124 | ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov, | 133 | ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov, |
@@ -142,8 +151,10 @@ static u8 opcode_table[256] = { | |||
142 | 0, 0, 0, 0, | 151 | 0, 0, 0, 0, |
143 | /* 0xD8 - 0xDF */ | 152 | /* 0xD8 - 0xDF */ |
144 | 0, 0, 0, 0, 0, 0, 0, 0, | 153 | 0, 0, 0, 0, 0, 0, 0, 0, |
145 | /* 0xE0 - 0xEF */ | 154 | /* 0xE0 - 0xE7 */ |
146 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 155 | 0, 0, 0, 0, 0, 0, 0, 0, |
156 | /* 0xE8 - 0xEF */ | ||
157 | ImplicitOps, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0, | ||
147 | /* 0xF0 - 0xF7 */ | 158 | /* 0xF0 - 0xF7 */ |
148 | 0, 0, 0, 0, | 159 | 0, 0, 0, 0, |
149 | ImplicitOps, 0, | 160 | ImplicitOps, 0, |
@@ -181,7 +192,10 @@ static u16 twobyte_table[256] = { | |||
181 | /* 0x70 - 0x7F */ | 192 | /* 0x70 - 0x7F */ |
182 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 193 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
183 | /* 0x80 - 0x8F */ | 194 | /* 0x80 - 0x8F */ |
184 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 195 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, |
196 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
197 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
198 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
185 | /* 0x90 - 0x9F */ | 199 | /* 0x90 - 0x9F */ |
186 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 200 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
187 | /* 0xA0 - 0xA7 */ | 201 | /* 0xA0 - 0xA7 */ |
@@ -207,19 +221,6 @@ static u16 twobyte_table[256] = { | |||
207 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | 221 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
208 | }; | 222 | }; |
209 | 223 | ||
210 | /* | ||
211 | * Tell the emulator that of the Group 7 instructions (sgdt, lidt, etc.) we | ||
212 | * are interested only in invlpg and not in any of the rest. | ||
213 | * | ||
214 | * invlpg is a special instruction in that the data it references may not | ||
215 | * be mapped. | ||
216 | */ | ||
217 | void kvm_emulator_want_group7_invlpg(void) | ||
218 | { | ||
219 | twobyte_table[1] &= ~SrcMem; | ||
220 | } | ||
221 | EXPORT_SYMBOL_GPL(kvm_emulator_want_group7_invlpg); | ||
222 | |||
223 | /* Type, address-of, and value of an instruction's operand. */ | 224 | /* Type, address-of, and value of an instruction's operand. */ |
224 | struct operand { | 225 | struct operand { |
225 | enum { OP_REG, OP_MEM, OP_IMM } type; | 226 | enum { OP_REG, OP_MEM, OP_IMM } type; |
@@ -420,7 +421,7 @@ struct operand { | |||
420 | #define insn_fetch(_type, _size, _eip) \ | 421 | #define insn_fetch(_type, _size, _eip) \ |
421 | ({ unsigned long _x; \ | 422 | ({ unsigned long _x; \ |
422 | rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x, \ | 423 | rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x, \ |
423 | (_size), ctxt); \ | 424 | (_size), ctxt->vcpu); \ |
424 | if ( rc != 0 ) \ | 425 | if ( rc != 0 ) \ |
425 | goto done; \ | 426 | goto done; \ |
426 | (_eip) += (_size); \ | 427 | (_eip) += (_size); \ |
@@ -428,10 +429,11 @@ struct operand { | |||
428 | }) | 429 | }) |
429 | 430 | ||
430 | /* Access/update address held in a register, based on addressing mode. */ | 431 | /* Access/update address held in a register, based on addressing mode. */ |
432 | #define address_mask(reg) \ | ||
433 | ((ad_bytes == sizeof(unsigned long)) ? \ | ||
434 | (reg) : ((reg) & ((1UL << (ad_bytes << 3)) - 1))) | ||
431 | #define register_address(base, reg) \ | 435 | #define register_address(base, reg) \ |
432 | ((base) + ((ad_bytes == sizeof(unsigned long)) ? (reg) : \ | 436 | ((base) + address_mask(reg)) |
433 | ((reg) & ((1UL << (ad_bytes << 3)) - 1)))) | ||
434 | |||
435 | #define register_address_increment(reg, inc) \ | 437 | #define register_address_increment(reg, inc) \ |
436 | do { \ | 438 | do { \ |
437 | /* signed type ensures sign extension to long */ \ | 439 | /* signed type ensures sign extension to long */ \ |
@@ -443,8 +445,19 @@ struct operand { | |||
443 | (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \ | 445 | (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \ |
444 | } while (0) | 446 | } while (0) |
445 | 447 | ||
446 | void *decode_register(u8 modrm_reg, unsigned long *regs, | 448 | #define JMP_REL(rel) \ |
447 | int highbyte_regs) | 449 | do { \ |
450 | _eip += (int)(rel); \ | ||
451 | _eip = ((op_bytes == 2) ? (uint16_t)_eip : (uint32_t)_eip); \ | ||
452 | } while (0) | ||
453 | |||
454 | /* | ||
455 | * Given the 'reg' portion of a ModRM byte, and a register block, return a | ||
456 | * pointer into the block that addresses the relevant register. | ||
457 | * @highbyte_regs specifies whether to decode AH,CH,DH,BH. | ||
458 | */ | ||
459 | static void *decode_register(u8 modrm_reg, unsigned long *regs, | ||
460 | int highbyte_regs) | ||
448 | { | 461 | { |
449 | void *p; | 462 | void *p; |
450 | 463 | ||
@@ -464,13 +477,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
464 | if (op_bytes == 2) | 477 | if (op_bytes == 2) |
465 | op_bytes = 3; | 478 | op_bytes = 3; |
466 | *address = 0; | 479 | *address = 0; |
467 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, ctxt); | 480 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
481 | ctxt->vcpu); | ||
468 | if (rc) | 482 | if (rc) |
469 | return rc; | 483 | return rc; |
470 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, ctxt); | 484 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
485 | ctxt->vcpu); | ||
471 | return rc; | 486 | return rc; |
472 | } | 487 | } |
473 | 488 | ||
489 | static int test_cc(unsigned int condition, unsigned int flags) | ||
490 | { | ||
491 | int rc = 0; | ||
492 | |||
493 | switch ((condition & 15) >> 1) { | ||
494 | case 0: /* o */ | ||
495 | rc |= (flags & EFLG_OF); | ||
496 | break; | ||
497 | case 1: /* b/c/nae */ | ||
498 | rc |= (flags & EFLG_CF); | ||
499 | break; | ||
500 | case 2: /* z/e */ | ||
501 | rc |= (flags & EFLG_ZF); | ||
502 | break; | ||
503 | case 3: /* be/na */ | ||
504 | rc |= (flags & (EFLG_CF|EFLG_ZF)); | ||
505 | break; | ||
506 | case 4: /* s */ | ||
507 | rc |= (flags & EFLG_SF); | ||
508 | break; | ||
509 | case 5: /* p/pe */ | ||
510 | rc |= (flags & EFLG_PF); | ||
511 | break; | ||
512 | case 7: /* le/ng */ | ||
513 | rc |= (flags & EFLG_ZF); | ||
514 | /* fall through */ | ||
515 | case 6: /* l/nge */ | ||
516 | rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); | ||
517 | break; | ||
518 | } | ||
519 | |||
520 | /* Odd condition identifiers (lsb == 1) have inverted sense. */ | ||
521 | return (!!rc ^ (condition & 1)); | ||
522 | } | ||
523 | |||
474 | int | 524 | int |
475 | x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 525 | x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
476 | { | 526 | { |
@@ -771,11 +821,15 @@ done_prefixes: | |||
771 | goto srcmem_common; | 821 | goto srcmem_common; |
772 | case SrcMem: | 822 | case SrcMem: |
773 | src.bytes = (d & ByteOp) ? 1 : op_bytes; | 823 | src.bytes = (d & ByteOp) ? 1 : op_bytes; |
824 | /* Don't fetch the address for invlpg: it could be unmapped. */ | ||
825 | if (twobyte && b == 0x01 && modrm_reg == 7) | ||
826 | break; | ||
774 | srcmem_common: | 827 | srcmem_common: |
775 | src.type = OP_MEM; | 828 | src.type = OP_MEM; |
776 | src.ptr = (unsigned long *)cr2; | 829 | src.ptr = (unsigned long *)cr2; |
830 | src.val = 0; | ||
777 | if ((rc = ops->read_emulated((unsigned long)src.ptr, | 831 | if ((rc = ops->read_emulated((unsigned long)src.ptr, |
778 | &src.val, src.bytes, ctxt)) != 0) | 832 | &src.val, src.bytes, ctxt->vcpu)) != 0) |
779 | goto done; | 833 | goto done; |
780 | src.orig_val = src.val; | 834 | src.orig_val = src.val; |
781 | break; | 835 | break; |
@@ -814,7 +868,7 @@ done_prefixes: | |||
814 | case DstReg: | 868 | case DstReg: |
815 | dst.type = OP_REG; | 869 | dst.type = OP_REG; |
816 | if ((d & ByteOp) | 870 | if ((d & ByteOp) |
817 | && !(twobyte_table && (b == 0xb6 || b == 0xb7))) { | 871 | && !(twobyte && (b == 0xb6 || b == 0xb7))) { |
818 | dst.ptr = decode_register(modrm_reg, _regs, | 872 | dst.ptr = decode_register(modrm_reg, _regs, |
819 | (rex_prefix == 0)); | 873 | (rex_prefix == 0)); |
820 | dst.val = *(u8 *) dst.ptr; | 874 | dst.val = *(u8 *) dst.ptr; |
@@ -838,6 +892,7 @@ done_prefixes: | |||
838 | dst.type = OP_MEM; | 892 | dst.type = OP_MEM; |
839 | dst.ptr = (unsigned long *)cr2; | 893 | dst.ptr = (unsigned long *)cr2; |
840 | dst.bytes = (d & ByteOp) ? 1 : op_bytes; | 894 | dst.bytes = (d & ByteOp) ? 1 : op_bytes; |
895 | dst.val = 0; | ||
841 | if (d & BitOp) { | 896 | if (d & BitOp) { |
842 | unsigned long mask = ~(dst.bytes * 8 - 1); | 897 | unsigned long mask = ~(dst.bytes * 8 - 1); |
843 | 898 | ||
@@ -845,7 +900,7 @@ done_prefixes: | |||
845 | } | 900 | } |
846 | if (!(d & Mov) && /* optimisation - avoid slow emulated read */ | 901 | if (!(d & Mov) && /* optimisation - avoid slow emulated read */ |
847 | ((rc = ops->read_emulated((unsigned long)dst.ptr, | 902 | ((rc = ops->read_emulated((unsigned long)dst.ptr, |
848 | &dst.val, dst.bytes, ctxt)) != 0)) | 903 | &dst.val, dst.bytes, ctxt->vcpu)) != 0)) |
849 | goto done; | 904 | goto done; |
850 | break; | 905 | break; |
851 | } | 906 | } |
@@ -871,10 +926,27 @@ done_prefixes: | |||
871 | sbb: /* sbb */ | 926 | sbb: /* sbb */ |
872 | emulate_2op_SrcV("sbb", src, dst, _eflags); | 927 | emulate_2op_SrcV("sbb", src, dst, _eflags); |
873 | break; | 928 | break; |
874 | case 0x20 ... 0x25: | 929 | case 0x20 ... 0x23: |
875 | and: /* and */ | 930 | and: /* and */ |
876 | emulate_2op_SrcV("and", src, dst, _eflags); | 931 | emulate_2op_SrcV("and", src, dst, _eflags); |
877 | break; | 932 | break; |
933 | case 0x24: /* and al imm8 */ | ||
934 | dst.type = OP_REG; | ||
935 | dst.ptr = &_regs[VCPU_REGS_RAX]; | ||
936 | dst.val = *(u8 *)dst.ptr; | ||
937 | dst.bytes = 1; | ||
938 | dst.orig_val = dst.val; | ||
939 | goto and; | ||
940 | case 0x25: /* and ax imm16, or eax imm32 */ | ||
941 | dst.type = OP_REG; | ||
942 | dst.bytes = op_bytes; | ||
943 | dst.ptr = &_regs[VCPU_REGS_RAX]; | ||
944 | if (op_bytes == 2) | ||
945 | dst.val = *(u16 *)dst.ptr; | ||
946 | else | ||
947 | dst.val = *(u32 *)dst.ptr; | ||
948 | dst.orig_val = dst.val; | ||
949 | goto and; | ||
878 | case 0x28 ... 0x2d: | 950 | case 0x28 ... 0x2d: |
879 | sub: /* sub */ | 951 | sub: /* sub */ |
880 | emulate_2op_SrcV("sub", src, dst, _eflags); | 952 | emulate_2op_SrcV("sub", src, dst, _eflags); |
@@ -892,6 +964,17 @@ done_prefixes: | |||
892 | goto cannot_emulate; | 964 | goto cannot_emulate; |
893 | dst.val = (s32) src.val; | 965 | dst.val = (s32) src.val; |
894 | break; | 966 | break; |
967 | case 0x6a: /* push imm8 */ | ||
968 | src.val = 0L; | ||
969 | src.val = insn_fetch(s8, 1, _eip); | ||
970 | push: | ||
971 | dst.type = OP_MEM; | ||
972 | dst.bytes = op_bytes; | ||
973 | dst.val = src.val; | ||
974 | register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes); | ||
975 | dst.ptr = (void *) register_address(ctxt->ss_base, | ||
976 | _regs[VCPU_REGS_RSP]); | ||
977 | break; | ||
895 | case 0x80 ... 0x83: /* Grp1 */ | 978 | case 0x80 ... 0x83: /* Grp1 */ |
896 | switch (modrm_reg) { | 979 | switch (modrm_reg) { |
897 | case 0: | 980 | case 0: |
@@ -939,18 +1022,10 @@ done_prefixes: | |||
939 | dst.val = src.val; | 1022 | dst.val = src.val; |
940 | lock_prefix = 1; | 1023 | lock_prefix = 1; |
941 | break; | 1024 | break; |
942 | case 0xa0 ... 0xa1: /* mov */ | ||
943 | dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; | ||
944 | dst.val = src.val; | ||
945 | _eip += ad_bytes; /* skip src displacement */ | ||
946 | break; | ||
947 | case 0xa2 ... 0xa3: /* mov */ | ||
948 | dst.val = (unsigned long)_regs[VCPU_REGS_RAX]; | ||
949 | _eip += ad_bytes; /* skip dst displacement */ | ||
950 | break; | ||
951 | case 0x88 ... 0x8b: /* mov */ | 1025 | case 0x88 ... 0x8b: /* mov */ |
952 | case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ | 1026 | goto mov; |
953 | dst.val = src.val; | 1027 | case 0x8d: /* lea r16/r32, m */ |
1028 | dst.val = modrm_val; | ||
954 | break; | 1029 | break; |
955 | case 0x8f: /* pop (sole member of Grp1a) */ | 1030 | case 0x8f: /* pop (sole member of Grp1a) */ |
956 | /* 64-bit mode: POP always pops a 64-bit operand. */ | 1031 | /* 64-bit mode: POP always pops a 64-bit operand. */ |
@@ -958,10 +1033,19 @@ done_prefixes: | |||
958 | dst.bytes = 8; | 1033 | dst.bytes = 8; |
959 | if ((rc = ops->read_std(register_address(ctxt->ss_base, | 1034 | if ((rc = ops->read_std(register_address(ctxt->ss_base, |
960 | _regs[VCPU_REGS_RSP]), | 1035 | _regs[VCPU_REGS_RSP]), |
961 | &dst.val, dst.bytes, ctxt)) != 0) | 1036 | &dst.val, dst.bytes, ctxt->vcpu)) != 0) |
962 | goto done; | 1037 | goto done; |
963 | register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes); | 1038 | register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes); |
964 | break; | 1039 | break; |
1040 | case 0xa0 ... 0xa1: /* mov */ | ||
1041 | dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; | ||
1042 | dst.val = src.val; | ||
1043 | _eip += ad_bytes; /* skip src displacement */ | ||
1044 | break; | ||
1045 | case 0xa2 ... 0xa3: /* mov */ | ||
1046 | dst.val = (unsigned long)_regs[VCPU_REGS_RAX]; | ||
1047 | _eip += ad_bytes; /* skip dst displacement */ | ||
1048 | break; | ||
965 | case 0xc0 ... 0xc1: | 1049 | case 0xc0 ... 0xc1: |
966 | grp2: /* Grp2 */ | 1050 | grp2: /* Grp2 */ |
967 | switch (modrm_reg) { | 1051 | switch (modrm_reg) { |
@@ -989,12 +1073,41 @@ done_prefixes: | |||
989 | break; | 1073 | break; |
990 | } | 1074 | } |
991 | break; | 1075 | break; |
1076 | case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ | ||
1077 | mov: | ||
1078 | dst.val = src.val; | ||
1079 | break; | ||
992 | case 0xd0 ... 0xd1: /* Grp2 */ | 1080 | case 0xd0 ... 0xd1: /* Grp2 */ |
993 | src.val = 1; | 1081 | src.val = 1; |
994 | goto grp2; | 1082 | goto grp2; |
995 | case 0xd2 ... 0xd3: /* Grp2 */ | 1083 | case 0xd2 ... 0xd3: /* Grp2 */ |
996 | src.val = _regs[VCPU_REGS_RCX]; | 1084 | src.val = _regs[VCPU_REGS_RCX]; |
997 | goto grp2; | 1085 | goto grp2; |
1086 | case 0xe8: /* call (near) */ { | ||
1087 | long int rel; | ||
1088 | switch (op_bytes) { | ||
1089 | case 2: | ||
1090 | rel = insn_fetch(s16, 2, _eip); | ||
1091 | break; | ||
1092 | case 4: | ||
1093 | rel = insn_fetch(s32, 4, _eip); | ||
1094 | break; | ||
1095 | case 8: | ||
1096 | rel = insn_fetch(s64, 8, _eip); | ||
1097 | break; | ||
1098 | default: | ||
1099 | DPRINTF("Call: Invalid op_bytes\n"); | ||
1100 | goto cannot_emulate; | ||
1101 | } | ||
1102 | src.val = (unsigned long) _eip; | ||
1103 | JMP_REL(rel); | ||
1104 | goto push; | ||
1105 | } | ||
1106 | case 0xe9: /* jmp rel */ | ||
1107 | case 0xeb: /* jmp rel short */ | ||
1108 | JMP_REL(src.val); | ||
1109 | no_wb = 1; /* Disable writeback. */ | ||
1110 | break; | ||
998 | case 0xf6 ... 0xf7: /* Grp3 */ | 1111 | case 0xf6 ... 0xf7: /* Grp3 */ |
999 | switch (modrm_reg) { | 1112 | switch (modrm_reg) { |
1000 | case 0 ... 1: /* test */ | 1113 | case 0 ... 1: /* test */ |
@@ -1037,13 +1150,19 @@ done_prefixes: | |||
1037 | case 1: /* dec */ | 1150 | case 1: /* dec */ |
1038 | emulate_1op("dec", dst, _eflags); | 1151 | emulate_1op("dec", dst, _eflags); |
1039 | break; | 1152 | break; |
1153 | case 4: /* jmp abs */ | ||
1154 | if (b == 0xff) | ||
1155 | _eip = dst.val; | ||
1156 | else | ||
1157 | goto cannot_emulate; | ||
1158 | break; | ||
1040 | case 6: /* push */ | 1159 | case 6: /* push */ |
1041 | /* 64-bit mode: PUSH always pushes a 64-bit operand. */ | 1160 | /* 64-bit mode: PUSH always pushes a 64-bit operand. */ |
1042 | if (mode == X86EMUL_MODE_PROT64) { | 1161 | if (mode == X86EMUL_MODE_PROT64) { |
1043 | dst.bytes = 8; | 1162 | dst.bytes = 8; |
1044 | if ((rc = ops->read_std((unsigned long)dst.ptr, | 1163 | if ((rc = ops->read_std((unsigned long)dst.ptr, |
1045 | &dst.val, 8, | 1164 | &dst.val, 8, |
1046 | ctxt)) != 0) | 1165 | ctxt->vcpu)) != 0) |
1047 | goto done; | 1166 | goto done; |
1048 | } | 1167 | } |
1049 | register_address_increment(_regs[VCPU_REGS_RSP], | 1168 | register_address_increment(_regs[VCPU_REGS_RSP], |
@@ -1051,7 +1170,7 @@ done_prefixes: | |||
1051 | if ((rc = ops->write_std( | 1170 | if ((rc = ops->write_std( |
1052 | register_address(ctxt->ss_base, | 1171 | register_address(ctxt->ss_base, |
1053 | _regs[VCPU_REGS_RSP]), | 1172 | _regs[VCPU_REGS_RSP]), |
1054 | &dst.val, dst.bytes, ctxt)) != 0) | 1173 | &dst.val, dst.bytes, ctxt->vcpu)) != 0) |
1055 | goto done; | 1174 | goto done; |
1056 | no_wb = 1; | 1175 | no_wb = 1; |
1057 | break; | 1176 | break; |
@@ -1086,11 +1205,11 @@ writeback: | |||
1086 | rc = ops->cmpxchg_emulated((unsigned long)dst. | 1205 | rc = ops->cmpxchg_emulated((unsigned long)dst. |
1087 | ptr, &dst.orig_val, | 1206 | ptr, &dst.orig_val, |
1088 | &dst.val, dst.bytes, | 1207 | &dst.val, dst.bytes, |
1089 | ctxt); | 1208 | ctxt->vcpu); |
1090 | else | 1209 | else |
1091 | rc = ops->write_emulated((unsigned long)dst.ptr, | 1210 | rc = ops->write_emulated((unsigned long)dst.ptr, |
1092 | &dst.val, dst.bytes, | 1211 | &dst.val, dst.bytes, |
1093 | ctxt); | 1212 | ctxt->vcpu); |
1094 | if (rc != 0) | 1213 | if (rc != 0) |
1095 | goto done; | 1214 | goto done; |
1096 | default: | 1215 | default: |
@@ -1109,6 +1228,81 @@ done: | |||
1109 | special_insn: | 1228 | special_insn: |
1110 | if (twobyte) | 1229 | if (twobyte) |
1111 | goto twobyte_special_insn; | 1230 | goto twobyte_special_insn; |
1231 | switch(b) { | ||
1232 | case 0x50 ... 0x57: /* push reg */ | ||
1233 | if (op_bytes == 2) | ||
1234 | src.val = (u16) _regs[b & 0x7]; | ||
1235 | else | ||
1236 | src.val = (u32) _regs[b & 0x7]; | ||
1237 | dst.type = OP_MEM; | ||
1238 | dst.bytes = op_bytes; | ||
1239 | dst.val = src.val; | ||
1240 | register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes); | ||
1241 | dst.ptr = (void *) register_address( | ||
1242 | ctxt->ss_base, _regs[VCPU_REGS_RSP]); | ||
1243 | break; | ||
1244 | case 0x58 ... 0x5f: /* pop reg */ | ||
1245 | dst.ptr = (unsigned long *)&_regs[b & 0x7]; | ||
1246 | pop_instruction: | ||
1247 | if ((rc = ops->read_std(register_address(ctxt->ss_base, | ||
1248 | _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt->vcpu)) | ||
1249 | != 0) | ||
1250 | goto done; | ||
1251 | |||
1252 | register_address_increment(_regs[VCPU_REGS_RSP], op_bytes); | ||
1253 | no_wb = 1; /* Disable writeback. */ | ||
1254 | break; | ||
1255 | case 0x6c: /* insb */ | ||
1256 | case 0x6d: /* insw/insd */ | ||
1257 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | ||
1258 | 1, /* in */ | ||
1259 | (d & ByteOp) ? 1 : op_bytes, /* size */ | ||
1260 | rep_prefix ? | ||
1261 | address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */ | ||
1262 | (_eflags & EFLG_DF), /* down */ | ||
1263 | register_address(ctxt->es_base, | ||
1264 | _regs[VCPU_REGS_RDI]), /* address */ | ||
1265 | rep_prefix, | ||
1266 | _regs[VCPU_REGS_RDX] /* port */ | ||
1267 | ) == 0) | ||
1268 | return -1; | ||
1269 | return 0; | ||
1270 | case 0x6e: /* outsb */ | ||
1271 | case 0x6f: /* outsw/outsd */ | ||
1272 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | ||
1273 | 0, /* in */ | ||
1274 | (d & ByteOp) ? 1 : op_bytes, /* size */ | ||
1275 | rep_prefix ? | ||
1276 | address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */ | ||
1277 | (_eflags & EFLG_DF), /* down */ | ||
1278 | register_address(override_base ? | ||
1279 | *override_base : ctxt->ds_base, | ||
1280 | _regs[VCPU_REGS_RSI]), /* address */ | ||
1281 | rep_prefix, | ||
1282 | _regs[VCPU_REGS_RDX] /* port */ | ||
1283 | ) == 0) | ||
1284 | return -1; | ||
1285 | return 0; | ||
1286 | case 0x70 ... 0x7f: /* jcc (short) */ { | ||
1287 | int rel = insn_fetch(s8, 1, _eip); | ||
1288 | |||
1289 | if (test_cc(b, _eflags)) | ||
1290 | JMP_REL(rel); | ||
1291 | break; | ||
1292 | } | ||
1293 | case 0x9c: /* pushf */ | ||
1294 | src.val = (unsigned long) _eflags; | ||
1295 | goto push; | ||
1296 | case 0x9d: /* popf */ | ||
1297 | dst.ptr = (unsigned long *) &_eflags; | ||
1298 | goto pop_instruction; | ||
1299 | case 0xc3: /* ret */ | ||
1300 | dst.ptr = &_eip; | ||
1301 | goto pop_instruction; | ||
1302 | case 0xf4: /* hlt */ | ||
1303 | ctxt->vcpu->halt_request = 1; | ||
1304 | goto done; | ||
1305 | } | ||
1112 | if (rep_prefix) { | 1306 | if (rep_prefix) { |
1113 | if (_regs[VCPU_REGS_RCX] == 0) { | 1307 | if (_regs[VCPU_REGS_RCX] == 0) { |
1114 | ctxt->vcpu->rip = _eip; | 1308 | ctxt->vcpu->rip = _eip; |
@@ -1125,7 +1319,7 @@ special_insn: | |||
1125 | _regs[VCPU_REGS_RDI]); | 1319 | _regs[VCPU_REGS_RDI]); |
1126 | if ((rc = ops->read_emulated(register_address( | 1320 | if ((rc = ops->read_emulated(register_address( |
1127 | override_base ? *override_base : ctxt->ds_base, | 1321 | override_base ? *override_base : ctxt->ds_base, |
1128 | _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt)) != 0) | 1322 | _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt->vcpu)) != 0) |
1129 | goto done; | 1323 | goto done; |
1130 | register_address_increment(_regs[VCPU_REGS_RSI], | 1324 | register_address_increment(_regs[VCPU_REGS_RSI], |
1131 | (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes); | 1325 | (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes); |
@@ -1147,7 +1341,8 @@ special_insn: | |||
1147 | dst.type = OP_REG; | 1341 | dst.type = OP_REG; |
1148 | dst.bytes = (d & ByteOp) ? 1 : op_bytes; | 1342 | dst.bytes = (d & ByteOp) ? 1 : op_bytes; |
1149 | dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; | 1343 | dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; |
1150 | if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0) | 1344 | if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes, |
1345 | ctxt->vcpu)) != 0) | ||
1151 | goto done; | 1346 | goto done; |
1152 | register_address_increment(_regs[VCPU_REGS_RSI], | 1347 | register_address_increment(_regs[VCPU_REGS_RSI], |
1153 | (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes); | 1348 | (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes); |
@@ -1155,23 +1350,7 @@ special_insn: | |||
1155 | case 0xae ... 0xaf: /* scas */ | 1350 | case 0xae ... 0xaf: /* scas */ |
1156 | DPRINTF("Urk! I don't handle SCAS.\n"); | 1351 | DPRINTF("Urk! I don't handle SCAS.\n"); |
1157 | goto cannot_emulate; | 1352 | goto cannot_emulate; |
1158 | case 0xf4: /* hlt */ | ||
1159 | ctxt->vcpu->halt_request = 1; | ||
1160 | goto done; | ||
1161 | case 0xc3: /* ret */ | ||
1162 | dst.ptr = &_eip; | ||
1163 | goto pop_instruction; | ||
1164 | case 0x58 ... 0x5f: /* pop reg */ | ||
1165 | dst.ptr = (unsigned long *)&_regs[b & 0x7]; | ||
1166 | 1353 | ||
1167 | pop_instruction: | ||
1168 | if ((rc = ops->read_std(register_address(ctxt->ss_base, | ||
1169 | _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt)) != 0) | ||
1170 | goto done; | ||
1171 | |||
1172 | register_address_increment(_regs[VCPU_REGS_RSP], op_bytes); | ||
1173 | no_wb = 1; /* Disable writeback. */ | ||
1174 | break; | ||
1175 | } | 1354 | } |
1176 | goto writeback; | 1355 | goto writeback; |
1177 | 1356 | ||
@@ -1230,40 +1409,50 @@ twobyte_insn: | |||
1230 | break; | 1409 | break; |
1231 | case 0x40 ... 0x4f: /* cmov */ | 1410 | case 0x40 ... 0x4f: /* cmov */ |
1232 | dst.val = dst.orig_val = src.val; | 1411 | dst.val = dst.orig_val = src.val; |
1233 | d &= ~Mov; /* default to no move */ | 1412 | no_wb = 1; |
1234 | /* | 1413 | /* |
1235 | * First, assume we're decoding an even cmov opcode | 1414 | * First, assume we're decoding an even cmov opcode |
1236 | * (lsb == 0). | 1415 | * (lsb == 0). |
1237 | */ | 1416 | */ |
1238 | switch ((b & 15) >> 1) { | 1417 | switch ((b & 15) >> 1) { |
1239 | case 0: /* cmovo */ | 1418 | case 0: /* cmovo */ |
1240 | d |= (_eflags & EFLG_OF) ? Mov : 0; | 1419 | no_wb = (_eflags & EFLG_OF) ? 0 : 1; |
1241 | break; | 1420 | break; |
1242 | case 1: /* cmovb/cmovc/cmovnae */ | 1421 | case 1: /* cmovb/cmovc/cmovnae */ |
1243 | d |= (_eflags & EFLG_CF) ? Mov : 0; | 1422 | no_wb = (_eflags & EFLG_CF) ? 0 : 1; |
1244 | break; | 1423 | break; |
1245 | case 2: /* cmovz/cmove */ | 1424 | case 2: /* cmovz/cmove */ |
1246 | d |= (_eflags & EFLG_ZF) ? Mov : 0; | 1425 | no_wb = (_eflags & EFLG_ZF) ? 0 : 1; |
1247 | break; | 1426 | break; |
1248 | case 3: /* cmovbe/cmovna */ | 1427 | case 3: /* cmovbe/cmovna */ |
1249 | d |= (_eflags & (EFLG_CF | EFLG_ZF)) ? Mov : 0; | 1428 | no_wb = (_eflags & (EFLG_CF | EFLG_ZF)) ? 0 : 1; |
1250 | break; | 1429 | break; |
1251 | case 4: /* cmovs */ | 1430 | case 4: /* cmovs */ |
1252 | d |= (_eflags & EFLG_SF) ? Mov : 0; | 1431 | no_wb = (_eflags & EFLG_SF) ? 0 : 1; |
1253 | break; | 1432 | break; |
1254 | case 5: /* cmovp/cmovpe */ | 1433 | case 5: /* cmovp/cmovpe */ |
1255 | d |= (_eflags & EFLG_PF) ? Mov : 0; | 1434 | no_wb = (_eflags & EFLG_PF) ? 0 : 1; |
1256 | break; | 1435 | break; |
1257 | case 7: /* cmovle/cmovng */ | 1436 | case 7: /* cmovle/cmovng */ |
1258 | d |= (_eflags & EFLG_ZF) ? Mov : 0; | 1437 | no_wb = (_eflags & EFLG_ZF) ? 0 : 1; |
1259 | /* fall through */ | 1438 | /* fall through */ |
1260 | case 6: /* cmovl/cmovnge */ | 1439 | case 6: /* cmovl/cmovnge */ |
1261 | d |= (!(_eflags & EFLG_SF) != | 1440 | no_wb &= (!(_eflags & EFLG_SF) != |
1262 | !(_eflags & EFLG_OF)) ? Mov : 0; | 1441 | !(_eflags & EFLG_OF)) ? 0 : 1; |
1263 | break; | 1442 | break; |
1264 | } | 1443 | } |
1265 | /* Odd cmov opcodes (lsb == 1) have inverted sense. */ | 1444 | /* Odd cmov opcodes (lsb == 1) have inverted sense. */ |
1266 | d ^= (b & 1) ? Mov : 0; | 1445 | no_wb ^= b & 1; |
1446 | break; | ||
1447 | case 0xa3: | ||
1448 | bt: /* bt */ | ||
1449 | src.val &= (dst.bytes << 3) - 1; /* only subword offset */ | ||
1450 | emulate_2op_SrcV_nobyte("bt", src, dst, _eflags); | ||
1451 | break; | ||
1452 | case 0xab: | ||
1453 | bts: /* bts */ | ||
1454 | src.val &= (dst.bytes << 3) - 1; /* only subword offset */ | ||
1455 | emulate_2op_SrcV_nobyte("bts", src, dst, _eflags); | ||
1267 | break; | 1456 | break; |
1268 | case 0xb0 ... 0xb1: /* cmpxchg */ | 1457 | case 0xb0 ... 0xb1: /* cmpxchg */ |
1269 | /* | 1458 | /* |
@@ -1273,8 +1462,6 @@ twobyte_insn: | |||
1273 | src.orig_val = src.val; | 1462 | src.orig_val = src.val; |
1274 | src.val = _regs[VCPU_REGS_RAX]; | 1463 | src.val = _regs[VCPU_REGS_RAX]; |
1275 | emulate_2op_SrcV("cmp", src, dst, _eflags); | 1464 | emulate_2op_SrcV("cmp", src, dst, _eflags); |
1276 | /* Always write back. The question is: where to? */ | ||
1277 | d |= Mov; | ||
1278 | if (_eflags & EFLG_ZF) { | 1465 | if (_eflags & EFLG_ZF) { |
1279 | /* Success: write back to memory. */ | 1466 | /* Success: write back to memory. */ |
1280 | dst.val = src.orig_val; | 1467 | dst.val = src.orig_val; |
@@ -1284,30 +1471,15 @@ twobyte_insn: | |||
1284 | dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; | 1471 | dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; |
1285 | } | 1472 | } |
1286 | break; | 1473 | break; |
1287 | case 0xa3: | ||
1288 | bt: /* bt */ | ||
1289 | src.val &= (dst.bytes << 3) - 1; /* only subword offset */ | ||
1290 | emulate_2op_SrcV_nobyte("bt", src, dst, _eflags); | ||
1291 | break; | ||
1292 | case 0xb3: | 1474 | case 0xb3: |
1293 | btr: /* btr */ | 1475 | btr: /* btr */ |
1294 | src.val &= (dst.bytes << 3) - 1; /* only subword offset */ | 1476 | src.val &= (dst.bytes << 3) - 1; /* only subword offset */ |
1295 | emulate_2op_SrcV_nobyte("btr", src, dst, _eflags); | 1477 | emulate_2op_SrcV_nobyte("btr", src, dst, _eflags); |
1296 | break; | 1478 | break; |
1297 | case 0xab: | ||
1298 | bts: /* bts */ | ||
1299 | src.val &= (dst.bytes << 3) - 1; /* only subword offset */ | ||
1300 | emulate_2op_SrcV_nobyte("bts", src, dst, _eflags); | ||
1301 | break; | ||
1302 | case 0xb6 ... 0xb7: /* movzx */ | 1479 | case 0xb6 ... 0xb7: /* movzx */ |
1303 | dst.bytes = op_bytes; | 1480 | dst.bytes = op_bytes; |
1304 | dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val; | 1481 | dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val; |
1305 | break; | 1482 | break; |
1306 | case 0xbb: | ||
1307 | btc: /* btc */ | ||
1308 | src.val &= (dst.bytes << 3) - 1; /* only subword offset */ | ||
1309 | emulate_2op_SrcV_nobyte("btc", src, dst, _eflags); | ||
1310 | break; | ||
1311 | case 0xba: /* Grp8 */ | 1483 | case 0xba: /* Grp8 */ |
1312 | switch (modrm_reg & 3) { | 1484 | switch (modrm_reg & 3) { |
1313 | case 0: | 1485 | case 0: |
@@ -1320,6 +1492,11 @@ twobyte_insn: | |||
1320 | goto btc; | 1492 | goto btc; |
1321 | } | 1493 | } |
1322 | break; | 1494 | break; |
1495 | case 0xbb: | ||
1496 | btc: /* btc */ | ||
1497 | src.val &= (dst.bytes << 3) - 1; /* only subword offset */ | ||
1498 | emulate_2op_SrcV_nobyte("btc", src, dst, _eflags); | ||
1499 | break; | ||
1323 | case 0xbe ... 0xbf: /* movsx */ | 1500 | case 0xbe ... 0xbf: /* movsx */ |
1324 | dst.bytes = op_bytes; | 1501 | dst.bytes = op_bytes; |
1325 | dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; | 1502 | dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; |
@@ -1331,14 +1508,14 @@ twobyte_special_insn: | |||
1331 | /* Disable writeback. */ | 1508 | /* Disable writeback. */ |
1332 | no_wb = 1; | 1509 | no_wb = 1; |
1333 | switch (b) { | 1510 | switch (b) { |
1511 | case 0x06: | ||
1512 | emulate_clts(ctxt->vcpu); | ||
1513 | break; | ||
1334 | case 0x09: /* wbinvd */ | 1514 | case 0x09: /* wbinvd */ |
1335 | break; | 1515 | break; |
1336 | case 0x0d: /* GrpP (prefetch) */ | 1516 | case 0x0d: /* GrpP (prefetch) */ |
1337 | case 0x18: /* Grp16 (prefetch/nop) */ | 1517 | case 0x18: /* Grp16 (prefetch/nop) */ |
1338 | break; | 1518 | break; |
1339 | case 0x06: | ||
1340 | emulate_clts(ctxt->vcpu); | ||
1341 | break; | ||
1342 | case 0x20: /* mov cr, reg */ | 1519 | case 0x20: /* mov cr, reg */ |
1343 | if (modrm_mod != 3) | 1520 | if (modrm_mod != 3) |
1344 | goto cannot_emulate; | 1521 | goto cannot_emulate; |
@@ -1355,7 +1532,7 @@ twobyte_special_insn: | |||
1355 | | ((u64)_regs[VCPU_REGS_RDX] << 32); | 1532 | | ((u64)_regs[VCPU_REGS_RDX] << 32); |
1356 | rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data); | 1533 | rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data); |
1357 | if (rc) { | 1534 | if (rc) { |
1358 | kvm_arch_ops->inject_gp(ctxt->vcpu, 0); | 1535 | kvm_x86_ops->inject_gp(ctxt->vcpu, 0); |
1359 | _eip = ctxt->vcpu->rip; | 1536 | _eip = ctxt->vcpu->rip; |
1360 | } | 1537 | } |
1361 | rc = X86EMUL_CONTINUE; | 1538 | rc = X86EMUL_CONTINUE; |
@@ -1364,7 +1541,7 @@ twobyte_special_insn: | |||
1364 | /* rdmsr */ | 1541 | /* rdmsr */ |
1365 | rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data); | 1542 | rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data); |
1366 | if (rc) { | 1543 | if (rc) { |
1367 | kvm_arch_ops->inject_gp(ctxt->vcpu, 0); | 1544 | kvm_x86_ops->inject_gp(ctxt->vcpu, 0); |
1368 | _eip = ctxt->vcpu->rip; | 1545 | _eip = ctxt->vcpu->rip; |
1369 | } else { | 1546 | } else { |
1370 | _regs[VCPU_REGS_RAX] = (u32)msr_data; | 1547 | _regs[VCPU_REGS_RAX] = (u32)msr_data; |
@@ -1372,10 +1549,32 @@ twobyte_special_insn: | |||
1372 | } | 1549 | } |
1373 | rc = X86EMUL_CONTINUE; | 1550 | rc = X86EMUL_CONTINUE; |
1374 | break; | 1551 | break; |
1552 | case 0x80 ... 0x8f: /* jnz rel, etc*/ { | ||
1553 | long int rel; | ||
1554 | |||
1555 | switch (op_bytes) { | ||
1556 | case 2: | ||
1557 | rel = insn_fetch(s16, 2, _eip); | ||
1558 | break; | ||
1559 | case 4: | ||
1560 | rel = insn_fetch(s32, 4, _eip); | ||
1561 | break; | ||
1562 | case 8: | ||
1563 | rel = insn_fetch(s64, 8, _eip); | ||
1564 | break; | ||
1565 | default: | ||
1566 | DPRINTF("jnz: Invalid op_bytes\n"); | ||
1567 | goto cannot_emulate; | ||
1568 | } | ||
1569 | if (test_cc(b, _eflags)) | ||
1570 | JMP_REL(rel); | ||
1571 | break; | ||
1572 | } | ||
1375 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 1573 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
1376 | { | 1574 | { |
1377 | u64 old, new; | 1575 | u64 old, new; |
1378 | if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0) | 1576 | if ((rc = ops->read_emulated(cr2, &old, 8, ctxt->vcpu)) |
1577 | != 0) | ||
1379 | goto done; | 1578 | goto done; |
1380 | if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) || | 1579 | if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) || |
1381 | ((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) { | 1580 | ((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) { |
@@ -1386,7 +1585,7 @@ twobyte_special_insn: | |||
1386 | new = ((u64)_regs[VCPU_REGS_RCX] << 32) | 1585 | new = ((u64)_regs[VCPU_REGS_RCX] << 32) |
1387 | | (u32) _regs[VCPU_REGS_RBX]; | 1586 | | (u32) _regs[VCPU_REGS_RBX]; |
1388 | if ((rc = ops->cmpxchg_emulated(cr2, &old, | 1587 | if ((rc = ops->cmpxchg_emulated(cr2, &old, |
1389 | &new, 8, ctxt)) != 0) | 1588 | &new, 8, ctxt->vcpu)) != 0) |
1390 | goto done; | 1589 | goto done; |
1391 | _eflags |= EFLG_ZF; | 1590 | _eflags |= EFLG_ZF; |
1392 | } | 1591 | } |
diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h index ea3407d7feee..92c73aa7f9ac 100644 --- a/drivers/kvm/x86_emulate.h +++ b/drivers/kvm/x86_emulate.h | |||
@@ -60,7 +60,7 @@ struct x86_emulate_ops { | |||
60 | * @bytes: [IN ] Number of bytes to read from memory. | 60 | * @bytes: [IN ] Number of bytes to read from memory. |
61 | */ | 61 | */ |
62 | int (*read_std)(unsigned long addr, void *val, | 62 | int (*read_std)(unsigned long addr, void *val, |
63 | unsigned int bytes, struct x86_emulate_ctxt * ctxt); | 63 | unsigned int bytes, struct kvm_vcpu *vcpu); |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * write_std: Write bytes of standard (non-emulated/special) memory. | 66 | * write_std: Write bytes of standard (non-emulated/special) memory. |
@@ -71,7 +71,7 @@ struct x86_emulate_ops { | |||
71 | * @bytes: [IN ] Number of bytes to write to memory. | 71 | * @bytes: [IN ] Number of bytes to write to memory. |
72 | */ | 72 | */ |
73 | int (*write_std)(unsigned long addr, const void *val, | 73 | int (*write_std)(unsigned long addr, const void *val, |
74 | unsigned int bytes, struct x86_emulate_ctxt * ctxt); | 74 | unsigned int bytes, struct kvm_vcpu *vcpu); |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * read_emulated: Read bytes from emulated/special memory area. | 77 | * read_emulated: Read bytes from emulated/special memory area. |
@@ -82,7 +82,7 @@ struct x86_emulate_ops { | |||
82 | int (*read_emulated) (unsigned long addr, | 82 | int (*read_emulated) (unsigned long addr, |
83 | void *val, | 83 | void *val, |
84 | unsigned int bytes, | 84 | unsigned int bytes, |
85 | struct x86_emulate_ctxt * ctxt); | 85 | struct kvm_vcpu *vcpu); |
86 | 86 | ||
87 | /* | 87 | /* |
88 | * write_emulated: Read bytes from emulated/special memory area. | 88 | * write_emulated: Read bytes from emulated/special memory area. |
@@ -94,7 +94,7 @@ struct x86_emulate_ops { | |||
94 | int (*write_emulated) (unsigned long addr, | 94 | int (*write_emulated) (unsigned long addr, |
95 | const void *val, | 95 | const void *val, |
96 | unsigned int bytes, | 96 | unsigned int bytes, |
97 | struct x86_emulate_ctxt * ctxt); | 97 | struct kvm_vcpu *vcpu); |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an | 100 | * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an |
@@ -108,12 +108,10 @@ struct x86_emulate_ops { | |||
108 | const void *old, | 108 | const void *old, |
109 | const void *new, | 109 | const void *new, |
110 | unsigned int bytes, | 110 | unsigned int bytes, |
111 | struct x86_emulate_ctxt * ctxt); | 111 | struct kvm_vcpu *vcpu); |
112 | 112 | ||
113 | }; | 113 | }; |
114 | 114 | ||
115 | struct cpu_user_regs; | ||
116 | |||
117 | struct x86_emulate_ctxt { | 115 | struct x86_emulate_ctxt { |
118 | /* Register state before/after emulation. */ | 116 | /* Register state before/after emulation. */ |
119 | struct kvm_vcpu *vcpu; | 117 | struct kvm_vcpu *vcpu; |
@@ -154,12 +152,4 @@ struct x86_emulate_ctxt { | |||
154 | int x86_emulate_memop(struct x86_emulate_ctxt *ctxt, | 152 | int x86_emulate_memop(struct x86_emulate_ctxt *ctxt, |
155 | struct x86_emulate_ops *ops); | 153 | struct x86_emulate_ops *ops); |
156 | 154 | ||
157 | /* | ||
158 | * Given the 'reg' portion of a ModRM byte, and a register block, return a | ||
159 | * pointer into the block that addresses the relevant register. | ||
160 | * @highbyte_regs specifies whether to decode AH,CH,DH,BH. | ||
161 | */ | ||
162 | void *decode_register(u8 modrm_reg, unsigned long *regs, | ||
163 | int highbyte_regs); | ||
164 | |||
165 | #endif /* __X86_EMULATE_H__ */ | 155 | #endif /* __X86_EMULATE_H__ */ |
diff --git a/include/asm-x86/io_apic_32.h b/include/asm-x86/io_apic_32.h index dbe734ddf2af..3f087883ea48 100644 --- a/include/asm-x86/io_apic_32.h +++ b/include/asm-x86/io_apic_32.h | |||
@@ -11,8 +11,6 @@ | |||
11 | * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar | 11 | * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #ifdef CONFIG_X86_IO_APIC | ||
15 | |||
16 | /* | 14 | /* |
17 | * The structure of the IO-APIC: | 15 | * The structure of the IO-APIC: |
18 | */ | 16 | */ |
@@ -55,12 +53,6 @@ union IO_APIC_reg_03 { | |||
55 | } __attribute__ ((packed)) bits; | 53 | } __attribute__ ((packed)) bits; |
56 | }; | 54 | }; |
57 | 55 | ||
58 | /* | ||
59 | * # of IO-APICs and # of IRQ routing registers | ||
60 | */ | ||
61 | extern int nr_ioapics; | ||
62 | extern int nr_ioapic_registers[MAX_IO_APICS]; | ||
63 | |||
64 | enum ioapic_irq_destination_types { | 56 | enum ioapic_irq_destination_types { |
65 | dest_Fixed = 0, | 57 | dest_Fixed = 0, |
66 | dest_LowestPrio = 1, | 58 | dest_LowestPrio = 1, |
@@ -100,6 +92,14 @@ struct IO_APIC_route_entry { | |||
100 | 92 | ||
101 | } __attribute__ ((packed)); | 93 | } __attribute__ ((packed)); |
102 | 94 | ||
95 | #ifdef CONFIG_X86_IO_APIC | ||
96 | |||
97 | /* | ||
98 | * # of IO-APICs and # of IRQ routing registers | ||
99 | */ | ||
100 | extern int nr_ioapics; | ||
101 | extern int nr_ioapic_registers[MAX_IO_APICS]; | ||
102 | |||
103 | /* | 103 | /* |
104 | * MP-BIOS irq configuration table structures: | 104 | * MP-BIOS irq configuration table structures: |
105 | */ | 105 | */ |
diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 5404e90edd57..199cab107d85 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h | |||
@@ -63,7 +63,7 @@ | |||
63 | /* | 63 | /* |
64 | * x86-64 Task Priority Register, CR8 | 64 | * x86-64 Task Priority Register, CR8 |
65 | */ | 65 | */ |
66 | #define X86_CR8_TPR 0x00000007 /* task priority register */ | 66 | #define X86_CR8_TPR 0x0000000F /* task priority register */ |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h> | 69 | * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h> |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index e6edca81ab84..057a7f34ee36 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -4,8 +4,7 @@ | |||
4 | /* | 4 | /* |
5 | * Userspace interface for /dev/kvm - kernel based virtual machine | 5 | * Userspace interface for /dev/kvm - kernel based virtual machine |
6 | * | 6 | * |
7 | * Note: this interface is considered experimental and may change without | 7 | * Note: you must update KVM_API_VERSION if you change this interface. |
8 | * notice. | ||
9 | */ | 8 | */ |
10 | 9 | ||
11 | #include <asm/types.h> | 10 | #include <asm/types.h> |
@@ -13,14 +12,8 @@ | |||
13 | 12 | ||
14 | #define KVM_API_VERSION 12 | 13 | #define KVM_API_VERSION 12 |
15 | 14 | ||
16 | /* | 15 | /* Architectural interrupt line count. */ |
17 | * Architectural interrupt line count, and the size of the bitmap needed | ||
18 | * to hold them. | ||
19 | */ | ||
20 | #define KVM_NR_INTERRUPTS 256 | 16 | #define KVM_NR_INTERRUPTS 256 |
21 | #define KVM_IRQ_BITMAP_SIZE_BYTES ((KVM_NR_INTERRUPTS + 7) / 8) | ||
22 | #define KVM_IRQ_BITMAP_SIZE(type) (KVM_IRQ_BITMAP_SIZE_BYTES / sizeof(type)) | ||
23 | |||
24 | 17 | ||
25 | /* for KVM_CREATE_MEMORY_REGION */ | 18 | /* for KVM_CREATE_MEMORY_REGION */ |
26 | struct kvm_memory_region { | 19 | struct kvm_memory_region { |
@@ -41,20 +34,89 @@ struct kvm_memory_alias { | |||
41 | __u64 target_phys_addr; | 34 | __u64 target_phys_addr; |
42 | }; | 35 | }; |
43 | 36 | ||
44 | enum kvm_exit_reason { | 37 | /* for KVM_IRQ_LINE */ |
45 | KVM_EXIT_UNKNOWN = 0, | 38 | struct kvm_irq_level { |
46 | KVM_EXIT_EXCEPTION = 1, | 39 | /* |
47 | KVM_EXIT_IO = 2, | 40 | * ACPI gsi notion of irq. |
48 | KVM_EXIT_HYPERCALL = 3, | 41 | * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. |
49 | KVM_EXIT_DEBUG = 4, | 42 | * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. |
50 | KVM_EXIT_HLT = 5, | 43 | */ |
51 | KVM_EXIT_MMIO = 6, | 44 | __u32 irq; |
52 | KVM_EXIT_IRQ_WINDOW_OPEN = 7, | 45 | __u32 level; |
53 | KVM_EXIT_SHUTDOWN = 8, | 46 | }; |
54 | KVM_EXIT_FAIL_ENTRY = 9, | 47 | |
55 | KVM_EXIT_INTR = 10, | 48 | /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ |
49 | struct kvm_pic_state { | ||
50 | __u8 last_irr; /* edge detection */ | ||
51 | __u8 irr; /* interrupt request register */ | ||
52 | __u8 imr; /* interrupt mask register */ | ||
53 | __u8 isr; /* interrupt service register */ | ||
54 | __u8 priority_add; /* highest irq priority */ | ||
55 | __u8 irq_base; | ||
56 | __u8 read_reg_select; | ||
57 | __u8 poll; | ||
58 | __u8 special_mask; | ||
59 | __u8 init_state; | ||
60 | __u8 auto_eoi; | ||
61 | __u8 rotate_on_auto_eoi; | ||
62 | __u8 special_fully_nested_mode; | ||
63 | __u8 init4; /* true if 4 byte init */ | ||
64 | __u8 elcr; /* PIIX edge/trigger selection */ | ||
65 | __u8 elcr_mask; | ||
66 | }; | ||
67 | |||
68 | #define KVM_IOAPIC_NUM_PINS 24 | ||
69 | struct kvm_ioapic_state { | ||
70 | __u64 base_address; | ||
71 | __u32 ioregsel; | ||
72 | __u32 id; | ||
73 | __u32 irr; | ||
74 | __u32 pad; | ||
75 | union { | ||
76 | __u64 bits; | ||
77 | struct { | ||
78 | __u8 vector; | ||
79 | __u8 delivery_mode:3; | ||
80 | __u8 dest_mode:1; | ||
81 | __u8 delivery_status:1; | ||
82 | __u8 polarity:1; | ||
83 | __u8 remote_irr:1; | ||
84 | __u8 trig_mode:1; | ||
85 | __u8 mask:1; | ||
86 | __u8 reserve:7; | ||
87 | __u8 reserved[4]; | ||
88 | __u8 dest_id; | ||
89 | } fields; | ||
90 | } redirtbl[KVM_IOAPIC_NUM_PINS]; | ||
56 | }; | 91 | }; |
57 | 92 | ||
93 | #define KVM_IRQCHIP_PIC_MASTER 0 | ||
94 | #define KVM_IRQCHIP_PIC_SLAVE 1 | ||
95 | #define KVM_IRQCHIP_IOAPIC 2 | ||
96 | |||
97 | struct kvm_irqchip { | ||
98 | __u32 chip_id; | ||
99 | __u32 pad; | ||
100 | union { | ||
101 | char dummy[512]; /* reserving space */ | ||
102 | struct kvm_pic_state pic; | ||
103 | struct kvm_ioapic_state ioapic; | ||
104 | } chip; | ||
105 | }; | ||
106 | |||
107 | #define KVM_EXIT_UNKNOWN 0 | ||
108 | #define KVM_EXIT_EXCEPTION 1 | ||
109 | #define KVM_EXIT_IO 2 | ||
110 | #define KVM_EXIT_HYPERCALL 3 | ||
111 | #define KVM_EXIT_DEBUG 4 | ||
112 | #define KVM_EXIT_HLT 5 | ||
113 | #define KVM_EXIT_MMIO 6 | ||
114 | #define KVM_EXIT_IRQ_WINDOW_OPEN 7 | ||
115 | #define KVM_EXIT_SHUTDOWN 8 | ||
116 | #define KVM_EXIT_FAIL_ENTRY 9 | ||
117 | #define KVM_EXIT_INTR 10 | ||
118 | #define KVM_EXIT_SET_TPR 11 | ||
119 | |||
58 | /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ | 120 | /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ |
59 | struct kvm_run { | 121 | struct kvm_run { |
60 | /* in */ | 122 | /* in */ |
@@ -106,11 +168,14 @@ struct kvm_run { | |||
106 | } mmio; | 168 | } mmio; |
107 | /* KVM_EXIT_HYPERCALL */ | 169 | /* KVM_EXIT_HYPERCALL */ |
108 | struct { | 170 | struct { |
171 | __u64 nr; | ||
109 | __u64 args[6]; | 172 | __u64 args[6]; |
110 | __u64 ret; | 173 | __u64 ret; |
111 | __u32 longmode; | 174 | __u32 longmode; |
112 | __u32 pad; | 175 | __u32 pad; |
113 | } hypercall; | 176 | } hypercall; |
177 | /* Fix the size of the union. */ | ||
178 | char padding[256]; | ||
114 | }; | 179 | }; |
115 | }; | 180 | }; |
116 | 181 | ||
@@ -139,6 +204,12 @@ struct kvm_fpu { | |||
139 | __u32 pad2; | 204 | __u32 pad2; |
140 | }; | 205 | }; |
141 | 206 | ||
207 | /* for KVM_GET_LAPIC and KVM_SET_LAPIC */ | ||
208 | #define KVM_APIC_REG_SIZE 0x400 | ||
209 | struct kvm_lapic_state { | ||
210 | char regs[KVM_APIC_REG_SIZE]; | ||
211 | }; | ||
212 | |||
142 | struct kvm_segment { | 213 | struct kvm_segment { |
143 | __u64 base; | 214 | __u64 base; |
144 | __u32 limit; | 215 | __u32 limit; |
@@ -164,7 +235,7 @@ struct kvm_sregs { | |||
164 | __u64 cr0, cr2, cr3, cr4, cr8; | 235 | __u64 cr0, cr2, cr3, cr4, cr8; |
165 | __u64 efer; | 236 | __u64 efer; |
166 | __u64 apic_base; | 237 | __u64 apic_base; |
167 | __u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)]; | 238 | __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; |
168 | }; | 239 | }; |
169 | 240 | ||
170 | struct kvm_msr_entry { | 241 | struct kvm_msr_entry { |
@@ -272,6 +343,12 @@ struct kvm_signal_mask { | |||
272 | #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ | 343 | #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ |
273 | 344 | ||
274 | /* | 345 | /* |
346 | * Extension capability list. | ||
347 | */ | ||
348 | #define KVM_CAP_IRQCHIP 0 | ||
349 | #define KVM_CAP_HLT 1 | ||
350 | |||
351 | /* | ||
275 | * ioctls for VM fds | 352 | * ioctls for VM fds |
276 | */ | 353 | */ |
277 | #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) | 354 | #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) |
@@ -282,6 +359,11 @@ struct kvm_signal_mask { | |||
282 | #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) | 359 | #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) |
283 | #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) | 360 | #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) |
284 | #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) | 361 | #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) |
362 | /* Device model IOC */ | ||
363 | #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) | ||
364 | #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) | ||
365 | #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) | ||
366 | #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) | ||
285 | 367 | ||
286 | /* | 368 | /* |
287 | * ioctls for vcpu fds | 369 | * ioctls for vcpu fds |
@@ -300,5 +382,7 @@ struct kvm_signal_mask { | |||
300 | #define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) | 382 | #define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) |
301 | #define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) | 383 | #define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) |
302 | #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) | 384 | #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) |
385 | #define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state) | ||
386 | #define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state) | ||
303 | 387 | ||
304 | #endif | 388 | #endif |