diff options
Diffstat (limited to 'arch/x86/kvm')
| -rw-r--r-- | arch/x86/kvm/Makefile | 5 | ||||
| -rw-r--r-- | arch/x86/kvm/i8254.c | 81 | ||||
| -rw-r--r-- | arch/x86/kvm/i8254.h | 7 | ||||
| -rw-r--r-- | arch/x86/kvm/i8259.c | 53 | ||||
| -rw-r--r-- | arch/x86/kvm/irq.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/irq.h | 6 | ||||
| -rw-r--r-- | arch/x86/kvm/kvm_cache_regs.h | 32 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 43 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.c | 680 | ||||
| -rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 249 | ||||
| -rw-r--r-- | arch/x86/kvm/svm.c | 156 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 712 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.h | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 552 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.h | 22 | ||||
| -rw-r--r-- | arch/x86/kvm/x86_emulate.c | 170 |
16 files changed, 1862 insertions, 912 deletions
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d0e940bb6f40..c02343594b4d 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
| @@ -3,10 +3,13 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
| 6 | coalesced_mmio.o) | 6 | coalesced_mmio.o irq_comm.o) |
| 7 | ifeq ($(CONFIG_KVM_TRACE),y) | 7 | ifeq ($(CONFIG_KVM_TRACE),y) |
| 8 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) | 8 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) |
| 9 | endif | 9 | endif |
| 10 | ifeq ($(CONFIG_DMAR),y) | ||
| 11 | common-objs += $(addprefix ../../../virt/kvm/, vtd.o) | ||
| 12 | endif | ||
| 10 | 13 | ||
| 11 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 14 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm |
| 12 | 15 | ||
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c0f7872a9124..634132a9a512 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -200,13 +200,14 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) | |||
| 200 | 200 | ||
| 201 | if (!atomic_inc_and_test(&pt->pending)) | 201 | if (!atomic_inc_and_test(&pt->pending)) |
| 202 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); | 202 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); |
| 203 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) { | 203 | |
| 204 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 204 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) |
| 205 | wake_up_interruptible(&vcpu0->wq); | 205 | wake_up_interruptible(&vcpu0->wq); |
| 206 | } | ||
| 207 | 206 | ||
| 208 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); | 207 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); |
| 209 | pt->scheduled = ktime_to_ns(pt->timer.expires); | 208 | pt->scheduled = ktime_to_ns(pt->timer.expires); |
| 209 | if (pt->period) | ||
| 210 | ps->channels[0].count_load_time = pt->timer.expires; | ||
| 210 | 211 | ||
| 211 | return (pt->period == 0 ? 0 : 1); | 212 | return (pt->period == 0 ? 0 : 1); |
| 212 | } | 213 | } |
| @@ -215,12 +216,22 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu) | |||
| 215 | { | 216 | { |
| 216 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | 217 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; |
| 217 | 218 | ||
| 218 | if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending) | 219 | if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack) |
| 219 | return atomic_read(&pit->pit_state.pit_timer.pending); | 220 | return atomic_read(&pit->pit_state.pit_timer.pending); |
| 220 | |||
| 221 | return 0; | 221 | return 0; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
| 225 | { | ||
| 226 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | ||
| 227 | irq_ack_notifier); | ||
| 228 | spin_lock(&ps->inject_lock); | ||
| 229 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) | ||
| 230 | atomic_inc(&ps->pit_timer.pending); | ||
| 231 | ps->irq_ack = 1; | ||
| 232 | spin_unlock(&ps->inject_lock); | ||
| 233 | } | ||
| 234 | |||
| 224 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | 235 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) |
| 225 | { | 236 | { |
| 226 | struct kvm_kpit_state *ps; | 237 | struct kvm_kpit_state *ps; |
| @@ -255,8 +266,9 @@ static void destroy_pit_timer(struct kvm_kpit_timer *pt) | |||
| 255 | hrtimer_cancel(&pt->timer); | 266 | hrtimer_cancel(&pt->timer); |
| 256 | } | 267 | } |
| 257 | 268 | ||
| 258 | static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period) | 269 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) |
| 259 | { | 270 | { |
| 271 | struct kvm_kpit_timer *pt = &ps->pit_timer; | ||
| 260 | s64 interval; | 272 | s64 interval; |
| 261 | 273 | ||
| 262 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 274 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
| @@ -268,6 +280,7 @@ static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period) | |||
| 268 | pt->period = (is_period == 0) ? 0 : interval; | 280 | pt->period = (is_period == 0) ? 0 : interval; |
| 269 | pt->timer.function = pit_timer_fn; | 281 | pt->timer.function = pit_timer_fn; |
| 270 | atomic_set(&pt->pending, 0); | 282 | atomic_set(&pt->pending, 0); |
| 283 | ps->irq_ack = 1; | ||
| 271 | 284 | ||
| 272 | hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), | 285 | hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), |
| 273 | HRTIMER_MODE_ABS); | 286 | HRTIMER_MODE_ABS); |
| @@ -302,11 +315,11 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
| 302 | case 1: | 315 | case 1: |
| 303 | /* FIXME: enhance mode 4 precision */ | 316 | /* FIXME: enhance mode 4 precision */ |
| 304 | case 4: | 317 | case 4: |
| 305 | create_pit_timer(&ps->pit_timer, val, 0); | 318 | create_pit_timer(ps, val, 0); |
| 306 | break; | 319 | break; |
| 307 | case 2: | 320 | case 2: |
| 308 | case 3: | 321 | case 3: |
| 309 | create_pit_timer(&ps->pit_timer, val, 1); | 322 | create_pit_timer(ps, val, 1); |
| 310 | break; | 323 | break; |
| 311 | default: | 324 | default: |
| 312 | destroy_pit_timer(&ps->pit_timer); | 325 | destroy_pit_timer(&ps->pit_timer); |
| @@ -520,7 +533,7 @@ void kvm_pit_reset(struct kvm_pit *pit) | |||
| 520 | mutex_unlock(&pit->pit_state.lock); | 533 | mutex_unlock(&pit->pit_state.lock); |
| 521 | 534 | ||
| 522 | atomic_set(&pit->pit_state.pit_timer.pending, 0); | 535 | atomic_set(&pit->pit_state.pit_timer.pending, 0); |
| 523 | pit->pit_state.inject_pending = 1; | 536 | pit->pit_state.irq_ack = 1; |
| 524 | } | 537 | } |
| 525 | 538 | ||
| 526 | struct kvm_pit *kvm_create_pit(struct kvm *kvm) | 539 | struct kvm_pit *kvm_create_pit(struct kvm *kvm) |
| @@ -534,6 +547,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) | |||
| 534 | 547 | ||
| 535 | mutex_init(&pit->pit_state.lock); | 548 | mutex_init(&pit->pit_state.lock); |
| 536 | mutex_lock(&pit->pit_state.lock); | 549 | mutex_lock(&pit->pit_state.lock); |
| 550 | spin_lock_init(&pit->pit_state.inject_lock); | ||
| 537 | 551 | ||
| 538 | /* Initialize PIO device */ | 552 | /* Initialize PIO device */ |
| 539 | pit->dev.read = pit_ioport_read; | 553 | pit->dev.read = pit_ioport_read; |
| @@ -555,6 +569,9 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) | |||
| 555 | pit_state->pit = pit; | 569 | pit_state->pit = pit; |
| 556 | hrtimer_init(&pit_state->pit_timer.timer, | 570 | hrtimer_init(&pit_state->pit_timer.timer, |
| 557 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 571 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
| 572 | pit_state->irq_ack_notifier.gsi = 0; | ||
| 573 | pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; | ||
| 574 | kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); | ||
| 558 | mutex_unlock(&pit->pit_state.lock); | 575 | mutex_unlock(&pit->pit_state.lock); |
| 559 | 576 | ||
| 560 | kvm_pit_reset(pit); | 577 | kvm_pit_reset(pit); |
| @@ -578,10 +595,8 @@ void kvm_free_pit(struct kvm *kvm) | |||
| 578 | static void __inject_pit_timer_intr(struct kvm *kvm) | 595 | static void __inject_pit_timer_intr(struct kvm *kvm) |
| 579 | { | 596 | { |
| 580 | mutex_lock(&kvm->lock); | 597 | mutex_lock(&kvm->lock); |
| 581 | kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); | 598 | kvm_set_irq(kvm, 0, 1); |
| 582 | kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0); | 599 | kvm_set_irq(kvm, 0, 0); |
| 583 | kvm_pic_set_irq(pic_irqchip(kvm), 0, 1); | ||
| 584 | kvm_pic_set_irq(pic_irqchip(kvm), 0, 0); | ||
| 585 | mutex_unlock(&kvm->lock); | 600 | mutex_unlock(&kvm->lock); |
| 586 | } | 601 | } |
| 587 | 602 | ||
| @@ -592,37 +607,19 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | |||
| 592 | struct kvm_kpit_state *ps; | 607 | struct kvm_kpit_state *ps; |
| 593 | 608 | ||
| 594 | if (vcpu && pit) { | 609 | if (vcpu && pit) { |
| 610 | int inject = 0; | ||
| 595 | ps = &pit->pit_state; | 611 | ps = &pit->pit_state; |
| 596 | 612 | ||
| 597 | /* Try to inject pending interrupts when: | 613 | /* Try to inject pending interrupts when |
| 598 | * 1. Pending exists | 614 | * last one has been acked. |
| 599 | * 2. Last interrupt was accepted or waited for too long time*/ | 615 | */ |
| 600 | if (atomic_read(&ps->pit_timer.pending) && | 616 | spin_lock(&ps->inject_lock); |
| 601 | (ps->inject_pending || | 617 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { |
| 602 | (jiffies - ps->last_injected_time | 618 | ps->irq_ack = 0; |
| 603 | >= KVM_MAX_PIT_INTR_INTERVAL))) { | 619 | inject = 1; |
| 604 | ps->inject_pending = 0; | ||
| 605 | __inject_pit_timer_intr(kvm); | ||
| 606 | ps->last_injected_time = jiffies; | ||
| 607 | } | ||
| 608 | } | ||
| 609 | } | ||
| 610 | |||
| 611 | void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | ||
| 612 | { | ||
| 613 | struct kvm_arch *arch = &vcpu->kvm->arch; | ||
| 614 | struct kvm_kpit_state *ps; | ||
| 615 | |||
| 616 | if (vcpu && arch->vpit) { | ||
| 617 | ps = &arch->vpit->pit_state; | ||
| 618 | if (atomic_read(&ps->pit_timer.pending) && | ||
| 619 | (((arch->vpic->pics[0].imr & 1) == 0 && | ||
| 620 | arch->vpic->pics[0].irq_base == vec) || | ||
| 621 | (arch->vioapic->redirtbl[0].fields.vector == vec && | ||
| 622 | arch->vioapic->redirtbl[0].fields.mask != 1))) { | ||
| 623 | ps->inject_pending = 1; | ||
| 624 | atomic_dec(&ps->pit_timer.pending); | ||
| 625 | ps->channels[0].count_load_time = ktime_get(); | ||
| 626 | } | 620 | } |
| 621 | spin_unlock(&ps->inject_lock); | ||
| 622 | if (inject) | ||
| 623 | __inject_pit_timer_intr(kvm); | ||
| 627 | } | 624 | } |
| 628 | } | 625 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index db25c2a6c8c4..e436d4983aa1 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
| @@ -8,7 +8,6 @@ struct kvm_kpit_timer { | |||
| 8 | int irq; | 8 | int irq; |
| 9 | s64 period; /* unit: ns */ | 9 | s64 period; /* unit: ns */ |
| 10 | s64 scheduled; | 10 | s64 scheduled; |
| 11 | ktime_t last_update; | ||
| 12 | atomic_t pending; | 11 | atomic_t pending; |
| 13 | }; | 12 | }; |
| 14 | 13 | ||
| @@ -34,8 +33,9 @@ struct kvm_kpit_state { | |||
| 34 | u32 speaker_data_on; | 33 | u32 speaker_data_on; |
| 35 | struct mutex lock; | 34 | struct mutex lock; |
| 36 | struct kvm_pit *pit; | 35 | struct kvm_pit *pit; |
| 37 | bool inject_pending; /* if inject pending interrupts */ | 36 | spinlock_t inject_lock; |
| 38 | unsigned long last_injected_time; | 37 | unsigned long irq_ack; |
| 38 | struct kvm_irq_ack_notifier irq_ack_notifier; | ||
| 39 | }; | 39 | }; |
| 40 | 40 | ||
| 41 | struct kvm_pit { | 41 | struct kvm_pit { |
| @@ -54,7 +54,6 @@ struct kvm_pit { | |||
| 54 | #define KVM_PIT_CHANNEL_MASK 0x3 | 54 | #define KVM_PIT_CHANNEL_MASK 0x3 |
| 55 | 55 | ||
| 56 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); | 56 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); |
| 57 | void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | ||
| 58 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val); | 57 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val); |
| 59 | struct kvm_pit *kvm_create_pit(struct kvm *kvm); | 58 | struct kvm_pit *kvm_create_pit(struct kvm *kvm); |
| 60 | void kvm_free_pit(struct kvm *kvm); | 59 | void kvm_free_pit(struct kvm *kvm); |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index c31164e8aa46..17e41e165f1a 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -30,6 +30,19 @@ | |||
| 30 | 30 | ||
| 31 | #include <linux/kvm_host.h> | 31 | #include <linux/kvm_host.h> |
| 32 | 32 | ||
| 33 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | ||
| 34 | { | ||
| 35 | s->isr &= ~(1 << irq); | ||
| 36 | s->isr_ack |= (1 << irq); | ||
| 37 | } | ||
| 38 | |||
| 39 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | ||
| 40 | { | ||
| 41 | struct kvm_pic *s = pic_irqchip(kvm); | ||
| 42 | s->pics[0].isr_ack = 0xff; | ||
| 43 | s->pics[1].isr_ack = 0xff; | ||
| 44 | } | ||
| 45 | |||
| 33 | /* | 46 | /* |
| 34 | * set irq level. If an edge is detected, then the IRR is set to 1 | 47 | * set irq level. If an edge is detected, then the IRR is set to 1 |
| 35 | */ | 48 | */ |
| @@ -141,11 +154,12 @@ void kvm_pic_set_irq(void *opaque, int irq, int level) | |||
| 141 | */ | 154 | */ |
| 142 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) | 155 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) |
| 143 | { | 156 | { |
| 157 | s->isr |= 1 << irq; | ||
| 144 | if (s->auto_eoi) { | 158 | if (s->auto_eoi) { |
| 145 | if (s->rotate_on_auto_eoi) | 159 | if (s->rotate_on_auto_eoi) |
| 146 | s->priority_add = (irq + 1) & 7; | 160 | s->priority_add = (irq + 1) & 7; |
| 147 | } else | 161 | pic_clear_isr(s, irq); |
| 148 | s->isr |= (1 << irq); | 162 | } |
| 149 | /* | 163 | /* |
| 150 | * We don't clear a level sensitive interrupt here | 164 | * We don't clear a level sensitive interrupt here |
| 151 | */ | 165 | */ |
| @@ -153,9 +167,10 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq) | |||
| 153 | s->irr &= ~(1 << irq); | 167 | s->irr &= ~(1 << irq); |
| 154 | } | 168 | } |
| 155 | 169 | ||
| 156 | int kvm_pic_read_irq(struct kvm_pic *s) | 170 | int kvm_pic_read_irq(struct kvm *kvm) |
| 157 | { | 171 | { |
| 158 | int irq, irq2, intno; | 172 | int irq, irq2, intno; |
| 173 | struct kvm_pic *s = pic_irqchip(kvm); | ||
| 159 | 174 | ||
| 160 | irq = pic_get_irq(&s->pics[0]); | 175 | irq = pic_get_irq(&s->pics[0]); |
| 161 | if (irq >= 0) { | 176 | if (irq >= 0) { |
| @@ -181,16 +196,32 @@ int kvm_pic_read_irq(struct kvm_pic *s) | |||
| 181 | intno = s->pics[0].irq_base + irq; | 196 | intno = s->pics[0].irq_base + irq; |
| 182 | } | 197 | } |
| 183 | pic_update_irq(s); | 198 | pic_update_irq(s); |
| 199 | kvm_notify_acked_irq(kvm, irq); | ||
| 184 | 200 | ||
| 185 | return intno; | 201 | return intno; |
| 186 | } | 202 | } |
| 187 | 203 | ||
| 188 | void kvm_pic_reset(struct kvm_kpic_state *s) | 204 | void kvm_pic_reset(struct kvm_kpic_state *s) |
| 189 | { | 205 | { |
| 206 | int irq, irqbase; | ||
| 207 | struct kvm *kvm = s->pics_state->irq_request_opaque; | ||
| 208 | struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; | ||
| 209 | |||
| 210 | if (s == &s->pics_state->pics[0]) | ||
| 211 | irqbase = 0; | ||
| 212 | else | ||
| 213 | irqbase = 8; | ||
| 214 | |||
| 215 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
| 216 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
| 217 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) | ||
| 218 | kvm_notify_acked_irq(kvm, irq+irqbase); | ||
| 219 | } | ||
| 190 | s->last_irr = 0; | 220 | s->last_irr = 0; |
| 191 | s->irr = 0; | 221 | s->irr = 0; |
| 192 | s->imr = 0; | 222 | s->imr = 0; |
| 193 | s->isr = 0; | 223 | s->isr = 0; |
| 224 | s->isr_ack = 0xff; | ||
| 194 | s->priority_add = 0; | 225 | s->priority_add = 0; |
| 195 | s->irq_base = 0; | 226 | s->irq_base = 0; |
| 196 | s->read_reg_select = 0; | 227 | s->read_reg_select = 0; |
| @@ -243,7 +274,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 243 | priority = get_priority(s, s->isr); | 274 | priority = get_priority(s, s->isr); |
| 244 | if (priority != 8) { | 275 | if (priority != 8) { |
| 245 | irq = (priority + s->priority_add) & 7; | 276 | irq = (priority + s->priority_add) & 7; |
| 246 | s->isr &= ~(1 << irq); | 277 | pic_clear_isr(s, irq); |
| 247 | if (cmd == 5) | 278 | if (cmd == 5) |
| 248 | s->priority_add = (irq + 1) & 7; | 279 | s->priority_add = (irq + 1) & 7; |
| 249 | pic_update_irq(s->pics_state); | 280 | pic_update_irq(s->pics_state); |
| @@ -251,7 +282,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 251 | break; | 282 | break; |
| 252 | case 3: | 283 | case 3: |
| 253 | irq = val & 7; | 284 | irq = val & 7; |
| 254 | s->isr &= ~(1 << irq); | 285 | pic_clear_isr(s, irq); |
| 255 | pic_update_irq(s->pics_state); | 286 | pic_update_irq(s->pics_state); |
| 256 | break; | 287 | break; |
| 257 | case 6: | 288 | case 6: |
| @@ -260,8 +291,8 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 260 | break; | 291 | break; |
| 261 | case 7: | 292 | case 7: |
| 262 | irq = val & 7; | 293 | irq = val & 7; |
| 263 | s->isr &= ~(1 << irq); | ||
| 264 | s->priority_add = (irq + 1) & 7; | 294 | s->priority_add = (irq + 1) & 7; |
| 295 | pic_clear_isr(s, irq); | ||
| 265 | pic_update_irq(s->pics_state); | 296 | pic_update_irq(s->pics_state); |
| 266 | break; | 297 | break; |
| 267 | default: | 298 | default: |
| @@ -303,7 +334,7 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) | |||
| 303 | s->pics_state->pics[0].irr &= ~(1 << 2); | 334 | s->pics_state->pics[0].irr &= ~(1 << 2); |
| 304 | } | 335 | } |
| 305 | s->irr &= ~(1 << ret); | 336 | s->irr &= ~(1 << ret); |
| 306 | s->isr &= ~(1 << ret); | 337 | pic_clear_isr(s, ret); |
| 307 | if (addr1 >> 7 || ret != 2) | 338 | if (addr1 >> 7 || ret != 2) |
| 308 | pic_update_irq(s->pics_state); | 339 | pic_update_irq(s->pics_state); |
| 309 | } else { | 340 | } else { |
| @@ -422,10 +453,14 @@ static void pic_irq_request(void *opaque, int level) | |||
| 422 | { | 453 | { |
| 423 | struct kvm *kvm = opaque; | 454 | struct kvm *kvm = opaque; |
| 424 | struct kvm_vcpu *vcpu = kvm->vcpus[0]; | 455 | struct kvm_vcpu *vcpu = kvm->vcpus[0]; |
| 456 | struct kvm_pic *s = pic_irqchip(kvm); | ||
| 457 | int irq = pic_get_irq(&s->pics[0]); | ||
| 425 | 458 | ||
| 426 | pic_irqchip(kvm)->output = level; | 459 | s->output = level; |
| 427 | if (vcpu) | 460 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
| 461 | s->pics[0].isr_ack &= ~(1 << irq); | ||
| 428 | kvm_vcpu_kick(vcpu); | 462 | kvm_vcpu_kick(vcpu); |
| 463 | } | ||
| 429 | } | 464 | } |
| 430 | 465 | ||
| 431 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) | 466 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 76d736b5f664..c019b8edcdb7 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
| @@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | |||
| 72 | if (kvm_apic_accept_pic_intr(v)) { | 72 | if (kvm_apic_accept_pic_intr(v)) { |
| 73 | s = pic_irqchip(v->kvm); | 73 | s = pic_irqchip(v->kvm); |
| 74 | s->output = 0; /* PIC */ | 74 | s->output = 0; /* PIC */ |
| 75 | vector = kvm_pic_read_irq(s); | 75 | vector = kvm_pic_read_irq(v->kvm); |
| 76 | } | 76 | } |
| 77 | } | 77 | } |
| 78 | return vector; | 78 | return vector; |
| @@ -90,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | |||
| 90 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | 90 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) |
| 91 | { | 91 | { |
| 92 | kvm_apic_timer_intr_post(vcpu, vec); | 92 | kvm_apic_timer_intr_post(vcpu, vec); |
| 93 | kvm_pit_timer_intr_post(vcpu, vec); | ||
| 94 | /* TODO: PIT, RTC etc. */ | 93 | /* TODO: PIT, RTC etc. */ |
| 95 | } | 94 | } |
| 96 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); | 95 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7ca47cbb48bb..f17c8f5bbf31 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
| @@ -42,6 +42,7 @@ struct kvm_kpic_state { | |||
| 42 | u8 irr; /* interrupt request register */ | 42 | u8 irr; /* interrupt request register */ |
| 43 | u8 imr; /* interrupt mask register */ | 43 | u8 imr; /* interrupt mask register */ |
| 44 | u8 isr; /* interrupt service register */ | 44 | u8 isr; /* interrupt service register */ |
| 45 | u8 isr_ack; /* interrupt ack detection */ | ||
| 45 | u8 priority_add; /* highest irq priority */ | 46 | u8 priority_add; /* highest irq priority */ |
| 46 | u8 irq_base; | 47 | u8 irq_base; |
| 47 | u8 read_reg_select; | 48 | u8 read_reg_select; |
| @@ -63,12 +64,13 @@ struct kvm_pic { | |||
| 63 | void *irq_request_opaque; | 64 | void *irq_request_opaque; |
| 64 | int output; /* intr from master PIC */ | 65 | int output; /* intr from master PIC */ |
| 65 | struct kvm_io_device dev; | 66 | struct kvm_io_device dev; |
| 67 | void (*ack_notifier)(void *opaque, int irq); | ||
| 66 | }; | 68 | }; |
| 67 | 69 | ||
| 68 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 70 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
| 69 | void kvm_pic_set_irq(void *opaque, int irq, int level); | 71 | int kvm_pic_read_irq(struct kvm *kvm); |
| 70 | int kvm_pic_read_irq(struct kvm_pic *s); | ||
| 71 | void kvm_pic_update_irq(struct kvm_pic *s); | 72 | void kvm_pic_update_irq(struct kvm_pic *s); |
| 73 | void kvm_pic_clear_isr_ack(struct kvm *kvm); | ||
| 72 | 74 | ||
| 73 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | 75 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) |
| 74 | { | 76 | { |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h new file mode 100644 index 000000000000..1ff819dce7d3 --- /dev/null +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | #ifndef ASM_KVM_CACHE_REGS_H | ||
| 2 | #define ASM_KVM_CACHE_REGS_H | ||
| 3 | |||
| 4 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | ||
| 5 | enum kvm_reg reg) | ||
| 6 | { | ||
| 7 | if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail)) | ||
| 8 | kvm_x86_ops->cache_reg(vcpu, reg); | ||
| 9 | |||
| 10 | return vcpu->arch.regs[reg]; | ||
| 11 | } | ||
| 12 | |||
| 13 | static inline void kvm_register_write(struct kvm_vcpu *vcpu, | ||
| 14 | enum kvm_reg reg, | ||
| 15 | unsigned long val) | ||
| 16 | { | ||
| 17 | vcpu->arch.regs[reg] = val; | ||
| 18 | __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); | ||
| 19 | __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); | ||
| 20 | } | ||
| 21 | |||
| 22 | static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) | ||
| 23 | { | ||
| 24 | return kvm_register_read(vcpu, VCPU_REGS_RIP); | ||
| 25 | } | ||
| 26 | |||
| 27 | static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) | ||
| 28 | { | ||
| 29 | kvm_register_write(vcpu, VCPU_REGS_RIP, val); | ||
| 30 | } | ||
| 31 | |||
| 32 | #endif | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 73f43de69f67..6571926bfd33 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <asm/current.h> | 32 | #include <asm/current.h> |
| 33 | #include <asm/apicdef.h> | 33 | #include <asm/apicdef.h> |
| 34 | #include <asm/atomic.h> | 34 | #include <asm/atomic.h> |
| 35 | #include "kvm_cache_regs.h" | ||
| 35 | #include "irq.h" | 36 | #include "irq.h" |
| 36 | 37 | ||
| 37 | #define PRId64 "d" | 38 | #define PRId64 "d" |
| @@ -338,13 +339,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 338 | } else | 339 | } else |
| 339 | apic_clear_vector(vector, apic->regs + APIC_TMR); | 340 | apic_clear_vector(vector, apic->regs + APIC_TMR); |
| 340 | 341 | ||
| 341 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 342 | kvm_vcpu_kick(vcpu); |
| 342 | kvm_vcpu_kick(vcpu); | ||
| 343 | else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { | ||
| 344 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 345 | if (waitqueue_active(&vcpu->wq)) | ||
| 346 | wake_up_interruptible(&vcpu->wq); | ||
| 347 | } | ||
| 348 | 343 | ||
| 349 | result = (orig_irr == 0); | 344 | result = (orig_irr == 0); |
| 350 | break; | 345 | break; |
| @@ -370,21 +365,18 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 370 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 365 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
| 371 | kvm_vcpu_kick(vcpu); | 366 | kvm_vcpu_kick(vcpu); |
| 372 | } else { | 367 | } else { |
| 373 | printk(KERN_DEBUG | 368 | apic_debug("Ignoring de-assert INIT to vcpu %d\n", |
| 374 | "Ignoring de-assert INIT to vcpu %d\n", | 369 | vcpu->vcpu_id); |
| 375 | vcpu->vcpu_id); | ||
| 376 | } | 370 | } |
| 377 | |||
| 378 | break; | 371 | break; |
| 379 | 372 | ||
| 380 | case APIC_DM_STARTUP: | 373 | case APIC_DM_STARTUP: |
| 381 | printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", | 374 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", |
| 382 | vcpu->vcpu_id, vector); | 375 | vcpu->vcpu_id, vector); |
| 383 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 376 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { |
| 384 | vcpu->arch.sipi_vector = vector; | 377 | vcpu->arch.sipi_vector = vector; |
| 385 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; | 378 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; |
| 386 | if (waitqueue_active(&vcpu->wq)) | 379 | kvm_vcpu_kick(vcpu); |
| 387 | wake_up_interruptible(&vcpu->wq); | ||
| 388 | } | 380 | } |
| 389 | break; | 381 | break; |
| 390 | 382 | ||
| @@ -438,7 +430,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, | |||
| 438 | static void apic_set_eoi(struct kvm_lapic *apic) | 430 | static void apic_set_eoi(struct kvm_lapic *apic) |
| 439 | { | 431 | { |
| 440 | int vector = apic_find_highest_isr(apic); | 432 | int vector = apic_find_highest_isr(apic); |
| 441 | 433 | int trigger_mode; | |
| 442 | /* | 434 | /* |
| 443 | * Not every write EOI will has corresponding ISR, | 435 | * Not every write EOI will has corresponding ISR, |
| 444 | * one example is when Kernel check timer on setup_IO_APIC | 436 | * one example is when Kernel check timer on setup_IO_APIC |
| @@ -450,7 +442,10 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
| 450 | apic_update_ppr(apic); | 442 | apic_update_ppr(apic); |
| 451 | 443 | ||
| 452 | if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) | 444 | if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) |
| 453 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector); | 445 | trigger_mode = IOAPIC_LEVEL_TRIG; |
| 446 | else | ||
| 447 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
| 448 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | ||
| 454 | } | 449 | } |
| 455 | 450 | ||
| 456 | static void apic_send_ipi(struct kvm_lapic *apic) | 451 | static void apic_send_ipi(struct kvm_lapic *apic) |
| @@ -558,8 +553,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write) | |||
| 558 | struct kvm_run *run = vcpu->run; | 553 | struct kvm_run *run = vcpu->run; |
| 559 | 554 | ||
| 560 | set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); | 555 | set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); |
| 561 | kvm_x86_ops->cache_regs(vcpu); | 556 | run->tpr_access.rip = kvm_rip_read(vcpu); |
| 562 | run->tpr_access.rip = vcpu->arch.rip; | ||
| 563 | run->tpr_access.is_write = write; | 557 | run->tpr_access.is_write = write; |
| 564 | } | 558 | } |
| 565 | 559 | ||
| @@ -683,9 +677,9 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
| 683 | * Refer SDM 8.4.1 | 677 | * Refer SDM 8.4.1 |
| 684 | */ | 678 | */ |
| 685 | if (len != 4 || alignment) { | 679 | if (len != 4 || alignment) { |
| 686 | if (printk_ratelimit()) | 680 | /* Don't shout loud, $infamous_os would cause only noise. */ |
| 687 | printk(KERN_ERR "apic write: bad size=%d %lx\n", | 681 | apic_debug("apic write: bad size=%d %lx\n", |
| 688 | len, (long)address); | 682 | len, (long)address); |
| 689 | return; | 683 | return; |
| 690 | } | 684 | } |
| 691 | 685 | ||
| @@ -947,10 +941,9 @@ static int __apic_timer_fn(struct kvm_lapic *apic) | |||
| 947 | 941 | ||
| 948 | if(!atomic_inc_and_test(&apic->timer.pending)) | 942 | if(!atomic_inc_and_test(&apic->timer.pending)) |
| 949 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); | 943 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); |
| 950 | if (waitqueue_active(q)) { | 944 | if (waitqueue_active(q)) |
| 951 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 952 | wake_up_interruptible(q); | 945 | wake_up_interruptible(q); |
| 953 | } | 946 | |
| 954 | if (apic_lvtt_period(apic)) { | 947 | if (apic_lvtt_period(apic)) { |
| 955 | result = 1; | 948 | result = 1; |
| 956 | apic->timer.dev.expires = ktime_add_ns( | 949 | apic->timer.dev.expires = ktime_add_ns( |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3da2508eb22a..99c239c5c0ac 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -70,6 +70,9 @@ static int dbg = 0; | |||
| 70 | module_param(dbg, bool, 0644); | 70 | module_param(dbg, bool, 0644); |
| 71 | #endif | 71 | #endif |
| 72 | 72 | ||
| 73 | static int oos_shadow = 1; | ||
| 74 | module_param(oos_shadow, bool, 0644); | ||
| 75 | |||
| 73 | #ifndef MMU_DEBUG | 76 | #ifndef MMU_DEBUG |
| 74 | #define ASSERT(x) do { } while (0) | 77 | #define ASSERT(x) do { } while (0) |
| 75 | #else | 78 | #else |
| @@ -135,18 +138,24 @@ module_param(dbg, bool, 0644); | |||
| 135 | #define ACC_USER_MASK PT_USER_MASK | 138 | #define ACC_USER_MASK PT_USER_MASK |
| 136 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 139 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
| 137 | 140 | ||
| 138 | struct kvm_pv_mmu_op_buffer { | 141 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
| 139 | void *ptr; | ||
| 140 | unsigned len; | ||
| 141 | unsigned processed; | ||
| 142 | char buf[512] __aligned(sizeof(long)); | ||
| 143 | }; | ||
| 144 | 142 | ||
| 145 | struct kvm_rmap_desc { | 143 | struct kvm_rmap_desc { |
| 146 | u64 *shadow_ptes[RMAP_EXT]; | 144 | u64 *shadow_ptes[RMAP_EXT]; |
| 147 | struct kvm_rmap_desc *more; | 145 | struct kvm_rmap_desc *more; |
| 148 | }; | 146 | }; |
| 149 | 147 | ||
| 148 | struct kvm_shadow_walk { | ||
| 149 | int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu, | ||
| 150 | u64 addr, u64 *spte, int level); | ||
| 151 | }; | ||
| 152 | |||
| 153 | struct kvm_unsync_walk { | ||
| 154 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
| 155 | }; | ||
| 156 | |||
| 157 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | ||
| 158 | |||
| 150 | static struct kmem_cache *pte_chain_cache; | 159 | static struct kmem_cache *pte_chain_cache; |
| 151 | static struct kmem_cache *rmap_desc_cache; | 160 | static struct kmem_cache *rmap_desc_cache; |
| 152 | static struct kmem_cache *mmu_page_header_cache; | 161 | static struct kmem_cache *mmu_page_header_cache; |
| @@ -405,16 +414,19 @@ static int host_largepage_backed(struct kvm *kvm, gfn_t gfn) | |||
| 405 | { | 414 | { |
| 406 | struct vm_area_struct *vma; | 415 | struct vm_area_struct *vma; |
| 407 | unsigned long addr; | 416 | unsigned long addr; |
| 417 | int ret = 0; | ||
| 408 | 418 | ||
| 409 | addr = gfn_to_hva(kvm, gfn); | 419 | addr = gfn_to_hva(kvm, gfn); |
| 410 | if (kvm_is_error_hva(addr)) | 420 | if (kvm_is_error_hva(addr)) |
| 411 | return 0; | 421 | return ret; |
| 412 | 422 | ||
| 423 | down_read(¤t->mm->mmap_sem); | ||
| 413 | vma = find_vma(current->mm, addr); | 424 | vma = find_vma(current->mm, addr); |
| 414 | if (vma && is_vm_hugetlb_page(vma)) | 425 | if (vma && is_vm_hugetlb_page(vma)) |
| 415 | return 1; | 426 | ret = 1; |
| 427 | up_read(¤t->mm->mmap_sem); | ||
| 416 | 428 | ||
| 417 | return 0; | 429 | return ret; |
| 418 | } | 430 | } |
| 419 | 431 | ||
| 420 | static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 432 | static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
| @@ -649,8 +661,6 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 649 | 661 | ||
| 650 | if (write_protected) | 662 | if (write_protected) |
| 651 | kvm_flush_remote_tlbs(kvm); | 663 | kvm_flush_remote_tlbs(kvm); |
| 652 | |||
| 653 | account_shadowed(kvm, gfn); | ||
| 654 | } | 664 | } |
| 655 | 665 | ||
| 656 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 666 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) |
| @@ -859,6 +869,77 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
| 859 | BUG(); | 869 | BUG(); |
| 860 | } | 870 | } |
| 861 | 871 | ||
| 872 | |||
| 873 | static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
| 874 | mmu_parent_walk_fn fn) | ||
| 875 | { | ||
| 876 | struct kvm_pte_chain *pte_chain; | ||
| 877 | struct hlist_node *node; | ||
| 878 | struct kvm_mmu_page *parent_sp; | ||
| 879 | int i; | ||
| 880 | |||
| 881 | if (!sp->multimapped && sp->parent_pte) { | ||
| 882 | parent_sp = page_header(__pa(sp->parent_pte)); | ||
| 883 | fn(vcpu, parent_sp); | ||
| 884 | mmu_parent_walk(vcpu, parent_sp, fn); | ||
| 885 | return; | ||
| 886 | } | ||
| 887 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
| 888 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
| 889 | if (!pte_chain->parent_ptes[i]) | ||
| 890 | break; | ||
| 891 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | ||
| 892 | fn(vcpu, parent_sp); | ||
| 893 | mmu_parent_walk(vcpu, parent_sp, fn); | ||
| 894 | } | ||
| 895 | } | ||
| 896 | |||
| 897 | static void kvm_mmu_update_unsync_bitmap(u64 *spte) | ||
| 898 | { | ||
| 899 | unsigned int index; | ||
| 900 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | ||
| 901 | |||
| 902 | index = spte - sp->spt; | ||
| 903 | __set_bit(index, sp->unsync_child_bitmap); | ||
| 904 | sp->unsync_children = 1; | ||
| 905 | } | ||
| 906 | |||
| 907 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | ||
| 908 | { | ||
| 909 | struct kvm_pte_chain *pte_chain; | ||
| 910 | struct hlist_node *node; | ||
| 911 | int i; | ||
| 912 | |||
| 913 | if (!sp->parent_pte) | ||
| 914 | return; | ||
| 915 | |||
| 916 | if (!sp->multimapped) { | ||
| 917 | kvm_mmu_update_unsync_bitmap(sp->parent_pte); | ||
| 918 | return; | ||
| 919 | } | ||
| 920 | |||
| 921 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
| 922 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
| 923 | if (!pte_chain->parent_ptes[i]) | ||
| 924 | break; | ||
| 925 | kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]); | ||
| 926 | } | ||
| 927 | } | ||
| 928 | |||
| 929 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
| 930 | { | ||
| 931 | sp->unsync_children = 1; | ||
| 932 | kvm_mmu_update_parents_unsync(sp); | ||
| 933 | return 1; | ||
| 934 | } | ||
| 935 | |||
| 936 | static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, | ||
| 937 | struct kvm_mmu_page *sp) | ||
| 938 | { | ||
| 939 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | ||
| 940 | kvm_mmu_update_parents_unsync(sp); | ||
| 941 | } | ||
| 942 | |||
| 862 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | 943 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, |
| 863 | struct kvm_mmu_page *sp) | 944 | struct kvm_mmu_page *sp) |
| 864 | { | 945 | { |
| @@ -868,6 +949,58 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | |||
| 868 | sp->spt[i] = shadow_trap_nonpresent_pte; | 949 | sp->spt[i] = shadow_trap_nonpresent_pte; |
| 869 | } | 950 | } |
| 870 | 951 | ||
| 952 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | ||
| 953 | struct kvm_mmu_page *sp) | ||
| 954 | { | ||
| 955 | return 1; | ||
| 956 | } | ||
| 957 | |||
| 958 | static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 959 | { | ||
| 960 | } | ||
| 961 | |||
| 962 | #define for_each_unsync_children(bitmap, idx) \ | ||
| 963 | for (idx = find_first_bit(bitmap, 512); \ | ||
| 964 | idx < 512; \ | ||
| 965 | idx = find_next_bit(bitmap, 512, idx+1)) | ||
| 966 | |||
| 967 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | ||
| 968 | struct kvm_unsync_walk *walker) | ||
| 969 | { | ||
| 970 | int i, ret; | ||
| 971 | |||
| 972 | if (!sp->unsync_children) | ||
| 973 | return 0; | ||
| 974 | |||
| 975 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | ||
| 976 | u64 ent = sp->spt[i]; | ||
| 977 | |||
| 978 | if (is_shadow_present_pte(ent)) { | ||
| 979 | struct kvm_mmu_page *child; | ||
| 980 | child = page_header(ent & PT64_BASE_ADDR_MASK); | ||
| 981 | |||
| 982 | if (child->unsync_children) { | ||
| 983 | ret = mmu_unsync_walk(child, walker); | ||
| 984 | if (ret) | ||
| 985 | return ret; | ||
| 986 | __clear_bit(i, sp->unsync_child_bitmap); | ||
| 987 | } | ||
| 988 | |||
| 989 | if (child->unsync) { | ||
| 990 | ret = walker->entry(child, walker); | ||
| 991 | __clear_bit(i, sp->unsync_child_bitmap); | ||
| 992 | if (ret) | ||
| 993 | return ret; | ||
| 994 | } | ||
| 995 | } | ||
| 996 | } | ||
| 997 | |||
| 998 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) | ||
| 999 | sp->unsync_children = 0; | ||
| 1000 | |||
| 1001 | return 0; | ||
| 1002 | } | ||
| 1003 | |||
| 871 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | 1004 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) |
| 872 | { | 1005 | { |
| 873 | unsigned index; | 1006 | unsigned index; |
| @@ -888,6 +1021,59 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
| 888 | return NULL; | 1021 | return NULL; |
| 889 | } | 1022 | } |
| 890 | 1023 | ||
| 1024 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
| 1025 | { | ||
| 1026 | WARN_ON(!sp->unsync); | ||
| 1027 | sp->unsync = 0; | ||
| 1028 | --kvm->stat.mmu_unsync; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | ||
| 1032 | |||
| 1033 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
| 1034 | { | ||
| 1035 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | ||
| 1036 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
| 1037 | return 1; | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | rmap_write_protect(vcpu->kvm, sp->gfn); | ||
| 1041 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | ||
| 1042 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
| 1043 | return 1; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | kvm_mmu_flush_tlb(vcpu); | ||
| 1047 | kvm_unlink_unsync_page(vcpu->kvm, sp); | ||
| 1048 | return 0; | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | struct sync_walker { | ||
| 1052 | struct kvm_vcpu *vcpu; | ||
| 1053 | struct kvm_unsync_walk walker; | ||
| 1054 | }; | ||
| 1055 | |||
| 1056 | static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | ||
| 1057 | { | ||
| 1058 | struct sync_walker *sync_walk = container_of(walk, struct sync_walker, | ||
| 1059 | walker); | ||
| 1060 | struct kvm_vcpu *vcpu = sync_walk->vcpu; | ||
| 1061 | |||
| 1062 | kvm_sync_page(vcpu, sp); | ||
| 1063 | return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
| 1067 | { | ||
| 1068 | struct sync_walker walker = { | ||
| 1069 | .walker = { .entry = mmu_sync_fn, }, | ||
| 1070 | .vcpu = vcpu, | ||
| 1071 | }; | ||
| 1072 | |||
| 1073 | while (mmu_unsync_walk(sp, &walker.walker)) | ||
| 1074 | cond_resched_lock(&vcpu->kvm->mmu_lock); | ||
| 1075 | } | ||
| 1076 | |||
| 891 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1077 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
| 892 | gfn_t gfn, | 1078 | gfn_t gfn, |
| 893 | gva_t gaddr, | 1079 | gva_t gaddr, |
| @@ -901,7 +1087,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 901 | unsigned quadrant; | 1087 | unsigned quadrant; |
| 902 | struct hlist_head *bucket; | 1088 | struct hlist_head *bucket; |
| 903 | struct kvm_mmu_page *sp; | 1089 | struct kvm_mmu_page *sp; |
| 904 | struct hlist_node *node; | 1090 | struct hlist_node *node, *tmp; |
| 905 | 1091 | ||
| 906 | role.word = 0; | 1092 | role.word = 0; |
| 907 | role.glevels = vcpu->arch.mmu.root_level; | 1093 | role.glevels = vcpu->arch.mmu.root_level; |
| @@ -917,9 +1103,20 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 917 | gfn, role.word); | 1103 | gfn, role.word); |
| 918 | index = kvm_page_table_hashfn(gfn); | 1104 | index = kvm_page_table_hashfn(gfn); |
| 919 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1105 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 920 | hlist_for_each_entry(sp, node, bucket, hash_link) | 1106 | hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) |
| 921 | if (sp->gfn == gfn && sp->role.word == role.word) { | 1107 | if (sp->gfn == gfn) { |
| 1108 | if (sp->unsync) | ||
| 1109 | if (kvm_sync_page(vcpu, sp)) | ||
| 1110 | continue; | ||
| 1111 | |||
| 1112 | if (sp->role.word != role.word) | ||
| 1113 | continue; | ||
| 1114 | |||
| 922 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1115 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
| 1116 | if (sp->unsync_children) { | ||
| 1117 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | ||
| 1118 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
| 1119 | } | ||
| 923 | pgprintk("%s: found\n", __func__); | 1120 | pgprintk("%s: found\n", __func__); |
| 924 | return sp; | 1121 | return sp; |
| 925 | } | 1122 | } |
| @@ -931,8 +1128,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 931 | sp->gfn = gfn; | 1128 | sp->gfn = gfn; |
| 932 | sp->role = role; | 1129 | sp->role = role; |
| 933 | hlist_add_head(&sp->hash_link, bucket); | 1130 | hlist_add_head(&sp->hash_link, bucket); |
| 934 | if (!metaphysical) | 1131 | if (!metaphysical) { |
| 935 | rmap_write_protect(vcpu->kvm, gfn); | 1132 | rmap_write_protect(vcpu->kvm, gfn); |
| 1133 | account_shadowed(vcpu->kvm, gfn); | ||
| 1134 | } | ||
| 936 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1135 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
| 937 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | 1136 | vcpu->arch.mmu.prefetch_page(vcpu, sp); |
| 938 | else | 1137 | else |
| @@ -940,6 +1139,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 940 | return sp; | 1139 | return sp; |
| 941 | } | 1140 | } |
| 942 | 1141 | ||
| 1142 | static int walk_shadow(struct kvm_shadow_walk *walker, | ||
| 1143 | struct kvm_vcpu *vcpu, u64 addr) | ||
| 1144 | { | ||
| 1145 | hpa_t shadow_addr; | ||
| 1146 | int level; | ||
| 1147 | int r; | ||
| 1148 | u64 *sptep; | ||
| 1149 | unsigned index; | ||
| 1150 | |||
| 1151 | shadow_addr = vcpu->arch.mmu.root_hpa; | ||
| 1152 | level = vcpu->arch.mmu.shadow_root_level; | ||
| 1153 | if (level == PT32E_ROOT_LEVEL) { | ||
| 1154 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | ||
| 1155 | shadow_addr &= PT64_BASE_ADDR_MASK; | ||
| 1156 | --level; | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | while (level >= PT_PAGE_TABLE_LEVEL) { | ||
| 1160 | index = SHADOW_PT_INDEX(addr, level); | ||
| 1161 | sptep = ((u64 *)__va(shadow_addr)) + index; | ||
| 1162 | r = walker->entry(walker, vcpu, addr, sptep, level); | ||
| 1163 | if (r) | ||
| 1164 | return r; | ||
| 1165 | shadow_addr = *sptep & PT64_BASE_ADDR_MASK; | ||
| 1166 | --level; | ||
| 1167 | } | ||
| 1168 | return 0; | ||
| 1169 | } | ||
| 1170 | |||
| 943 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1171 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
| 944 | struct kvm_mmu_page *sp) | 1172 | struct kvm_mmu_page *sp) |
| 945 | { | 1173 | { |
| @@ -955,7 +1183,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
| 955 | rmap_remove(kvm, &pt[i]); | 1183 | rmap_remove(kvm, &pt[i]); |
| 956 | pt[i] = shadow_trap_nonpresent_pte; | 1184 | pt[i] = shadow_trap_nonpresent_pte; |
| 957 | } | 1185 | } |
| 958 | kvm_flush_remote_tlbs(kvm); | ||
| 959 | return; | 1186 | return; |
| 960 | } | 1187 | } |
| 961 | 1188 | ||
| @@ -974,7 +1201,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
| 974 | } | 1201 | } |
| 975 | pt[i] = shadow_trap_nonpresent_pte; | 1202 | pt[i] = shadow_trap_nonpresent_pte; |
| 976 | } | 1203 | } |
| 977 | kvm_flush_remote_tlbs(kvm); | ||
| 978 | } | 1204 | } |
| 979 | 1205 | ||
| 980 | static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | 1206 | static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) |
| @@ -991,11 +1217,10 @@ static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm) | |||
| 991 | kvm->vcpus[i]->arch.last_pte_updated = NULL; | 1217 | kvm->vcpus[i]->arch.last_pte_updated = NULL; |
| 992 | } | 1218 | } |
| 993 | 1219 | ||
| 994 | static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1220 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) |
| 995 | { | 1221 | { |
| 996 | u64 *parent_pte; | 1222 | u64 *parent_pte; |
| 997 | 1223 | ||
| 998 | ++kvm->stat.mmu_shadow_zapped; | ||
| 999 | while (sp->multimapped || sp->parent_pte) { | 1224 | while (sp->multimapped || sp->parent_pte) { |
| 1000 | if (!sp->multimapped) | 1225 | if (!sp->multimapped) |
| 1001 | parent_pte = sp->parent_pte; | 1226 | parent_pte = sp->parent_pte; |
| @@ -1010,21 +1235,59 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
| 1010 | kvm_mmu_put_page(sp, parent_pte); | 1235 | kvm_mmu_put_page(sp, parent_pte); |
| 1011 | set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte); | 1236 | set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte); |
| 1012 | } | 1237 | } |
| 1238 | } | ||
| 1239 | |||
| 1240 | struct zap_walker { | ||
| 1241 | struct kvm_unsync_walk walker; | ||
| 1242 | struct kvm *kvm; | ||
| 1243 | int zapped; | ||
| 1244 | }; | ||
| 1245 | |||
| 1246 | static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | ||
| 1247 | { | ||
| 1248 | struct zap_walker *zap_walk = container_of(walk, struct zap_walker, | ||
| 1249 | walker); | ||
| 1250 | kvm_mmu_zap_page(zap_walk->kvm, sp); | ||
| 1251 | zap_walk->zapped = 1; | ||
| 1252 | return 0; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
| 1256 | { | ||
| 1257 | struct zap_walker walker = { | ||
| 1258 | .walker = { .entry = mmu_zap_fn, }, | ||
| 1259 | .kvm = kvm, | ||
| 1260 | .zapped = 0, | ||
| 1261 | }; | ||
| 1262 | |||
| 1263 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) | ||
| 1264 | return 0; | ||
| 1265 | mmu_unsync_walk(sp, &walker.walker); | ||
| 1266 | return walker.zapped; | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
| 1270 | { | ||
| 1271 | int ret; | ||
| 1272 | ++kvm->stat.mmu_shadow_zapped; | ||
| 1273 | ret = mmu_zap_unsync_children(kvm, sp); | ||
| 1013 | kvm_mmu_page_unlink_children(kvm, sp); | 1274 | kvm_mmu_page_unlink_children(kvm, sp); |
| 1275 | kvm_mmu_unlink_parents(kvm, sp); | ||
| 1276 | kvm_flush_remote_tlbs(kvm); | ||
| 1277 | if (!sp->role.invalid && !sp->role.metaphysical) | ||
| 1278 | unaccount_shadowed(kvm, sp->gfn); | ||
| 1279 | if (sp->unsync) | ||
| 1280 | kvm_unlink_unsync_page(kvm, sp); | ||
| 1014 | if (!sp->root_count) { | 1281 | if (!sp->root_count) { |
| 1015 | if (!sp->role.metaphysical && !sp->role.invalid) | ||
| 1016 | unaccount_shadowed(kvm, sp->gfn); | ||
| 1017 | hlist_del(&sp->hash_link); | 1282 | hlist_del(&sp->hash_link); |
| 1018 | kvm_mmu_free_page(kvm, sp); | 1283 | kvm_mmu_free_page(kvm, sp); |
| 1019 | } else { | 1284 | } else { |
| 1020 | int invalid = sp->role.invalid; | ||
| 1021 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | ||
| 1022 | sp->role.invalid = 1; | 1285 | sp->role.invalid = 1; |
| 1286 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | ||
| 1023 | kvm_reload_remote_mmus(kvm); | 1287 | kvm_reload_remote_mmus(kvm); |
| 1024 | if (!sp->role.metaphysical && !invalid) | ||
| 1025 | unaccount_shadowed(kvm, sp->gfn); | ||
| 1026 | } | 1288 | } |
| 1027 | kvm_mmu_reset_last_pte_updated(kvm); | 1289 | kvm_mmu_reset_last_pte_updated(kvm); |
| 1290 | return ret; | ||
| 1028 | } | 1291 | } |
| 1029 | 1292 | ||
| 1030 | /* | 1293 | /* |
| @@ -1077,8 +1340,9 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
| 1077 | if (sp->gfn == gfn && !sp->role.metaphysical) { | 1340 | if (sp->gfn == gfn && !sp->role.metaphysical) { |
| 1078 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1341 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
| 1079 | sp->role.word); | 1342 | sp->role.word); |
| 1080 | kvm_mmu_zap_page(kvm, sp); | ||
| 1081 | r = 1; | 1343 | r = 1; |
| 1344 | if (kvm_mmu_zap_page(kvm, sp)) | ||
| 1345 | n = bucket->first; | ||
| 1082 | } | 1346 | } |
| 1083 | return r; | 1347 | return r; |
| 1084 | } | 1348 | } |
| @@ -1101,6 +1365,20 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | |||
| 1101 | __set_bit(slot, &sp->slot_bitmap); | 1365 | __set_bit(slot, &sp->slot_bitmap); |
| 1102 | } | 1366 | } |
| 1103 | 1367 | ||
| 1368 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) | ||
| 1369 | { | ||
| 1370 | int i; | ||
| 1371 | u64 *pt = sp->spt; | ||
| 1372 | |||
| 1373 | if (shadow_trap_nonpresent_pte == shadow_notrap_nonpresent_pte) | ||
| 1374 | return; | ||
| 1375 | |||
| 1376 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
| 1377 | if (pt[i] == shadow_notrap_nonpresent_pte) | ||
| 1378 | set_shadow_pte(&pt[i], shadow_trap_nonpresent_pte); | ||
| 1379 | } | ||
| 1380 | } | ||
| 1381 | |||
| 1104 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | 1382 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) |
| 1105 | { | 1383 | { |
| 1106 | struct page *page; | 1384 | struct page *page; |
| @@ -1110,51 +1388,60 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 1110 | if (gpa == UNMAPPED_GVA) | 1388 | if (gpa == UNMAPPED_GVA) |
| 1111 | return NULL; | 1389 | return NULL; |
| 1112 | 1390 | ||
| 1113 | down_read(¤t->mm->mmap_sem); | ||
| 1114 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 1391 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 1115 | up_read(¤t->mm->mmap_sem); | ||
| 1116 | 1392 | ||
| 1117 | return page; | 1393 | return page; |
| 1118 | } | 1394 | } |
| 1119 | 1395 | ||
| 1120 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1396 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
| 1121 | unsigned pt_access, unsigned pte_access, | ||
| 1122 | int user_fault, int write_fault, int dirty, | ||
| 1123 | int *ptwrite, int largepage, gfn_t gfn, | ||
| 1124 | pfn_t pfn, bool speculative) | ||
| 1125 | { | 1397 | { |
| 1126 | u64 spte; | 1398 | unsigned index; |
| 1127 | int was_rmapped = 0; | 1399 | struct hlist_head *bucket; |
| 1128 | int was_writeble = is_writeble_pte(*shadow_pte); | 1400 | struct kvm_mmu_page *s; |
| 1401 | struct hlist_node *node, *n; | ||
| 1129 | 1402 | ||
| 1130 | pgprintk("%s: spte %llx access %x write_fault %d" | 1403 | index = kvm_page_table_hashfn(sp->gfn); |
| 1131 | " user_fault %d gfn %lx\n", | 1404 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 1132 | __func__, *shadow_pte, pt_access, | 1405 | /* don't unsync if pagetable is shadowed with multiple roles */ |
| 1133 | write_fault, user_fault, gfn); | 1406 | hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { |
| 1407 | if (s->gfn != sp->gfn || s->role.metaphysical) | ||
| 1408 | continue; | ||
| 1409 | if (s->role.word != sp->role.word) | ||
| 1410 | return 1; | ||
| 1411 | } | ||
| 1412 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
| 1413 | ++vcpu->kvm->stat.mmu_unsync; | ||
| 1414 | sp->unsync = 1; | ||
| 1415 | mmu_convert_notrap(sp); | ||
| 1416 | return 0; | ||
| 1417 | } | ||
| 1134 | 1418 | ||
| 1135 | if (is_rmap_pte(*shadow_pte)) { | 1419 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, |
| 1136 | /* | 1420 | bool can_unsync) |
| 1137 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink | 1421 | { |
| 1138 | * the parent of the now unreachable PTE. | 1422 | struct kvm_mmu_page *shadow; |
| 1139 | */ | ||
| 1140 | if (largepage && !is_large_pte(*shadow_pte)) { | ||
| 1141 | struct kvm_mmu_page *child; | ||
| 1142 | u64 pte = *shadow_pte; | ||
| 1143 | 1423 | ||
| 1144 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1424 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); |
| 1145 | mmu_page_remove_parent_pte(child, shadow_pte); | 1425 | if (shadow) { |
| 1146 | } else if (pfn != spte_to_pfn(*shadow_pte)) { | 1426 | if (shadow->role.level != PT_PAGE_TABLE_LEVEL) |
| 1147 | pgprintk("hfn old %lx new %lx\n", | 1427 | return 1; |
| 1148 | spte_to_pfn(*shadow_pte), pfn); | 1428 | if (shadow->unsync) |
| 1149 | rmap_remove(vcpu->kvm, shadow_pte); | 1429 | return 0; |
| 1150 | } else { | 1430 | if (can_unsync && oos_shadow) |
| 1151 | if (largepage) | 1431 | return kvm_unsync_page(vcpu, shadow); |
| 1152 | was_rmapped = is_large_pte(*shadow_pte); | 1432 | return 1; |
| 1153 | else | ||
| 1154 | was_rmapped = 1; | ||
| 1155 | } | ||
| 1156 | } | 1433 | } |
| 1434 | return 0; | ||
| 1435 | } | ||
| 1157 | 1436 | ||
| 1437 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | ||
| 1438 | unsigned pte_access, int user_fault, | ||
| 1439 | int write_fault, int dirty, int largepage, | ||
| 1440 | gfn_t gfn, pfn_t pfn, bool speculative, | ||
| 1441 | bool can_unsync) | ||
| 1442 | { | ||
| 1443 | u64 spte; | ||
| 1444 | int ret = 0; | ||
| 1158 | /* | 1445 | /* |
| 1159 | * We don't set the accessed bit, since we sometimes want to see | 1446 | * We don't set the accessed bit, since we sometimes want to see |
| 1160 | * whether the guest actually used the pte (in order to detect | 1447 | * whether the guest actually used the pte (in order to detect |
| @@ -1162,7 +1449,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
| 1162 | */ | 1449 | */ |
| 1163 | spte = shadow_base_present_pte | shadow_dirty_mask; | 1450 | spte = shadow_base_present_pte | shadow_dirty_mask; |
| 1164 | if (!speculative) | 1451 | if (!speculative) |
| 1165 | pte_access |= PT_ACCESSED_MASK; | 1452 | spte |= shadow_accessed_mask; |
| 1166 | if (!dirty) | 1453 | if (!dirty) |
| 1167 | pte_access &= ~ACC_WRITE_MASK; | 1454 | pte_access &= ~ACC_WRITE_MASK; |
| 1168 | if (pte_access & ACC_EXEC_MASK) | 1455 | if (pte_access & ACC_EXEC_MASK) |
| @@ -1178,35 +1465,82 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
| 1178 | 1465 | ||
| 1179 | if ((pte_access & ACC_WRITE_MASK) | 1466 | if ((pte_access & ACC_WRITE_MASK) |
| 1180 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1467 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { |
| 1181 | struct kvm_mmu_page *shadow; | 1468 | |
| 1469 | if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) { | ||
| 1470 | ret = 1; | ||
| 1471 | spte = shadow_trap_nonpresent_pte; | ||
| 1472 | goto set_pte; | ||
| 1473 | } | ||
| 1182 | 1474 | ||
| 1183 | spte |= PT_WRITABLE_MASK; | 1475 | spte |= PT_WRITABLE_MASK; |
| 1184 | 1476 | ||
| 1185 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1477 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
| 1186 | if (shadow || | ||
| 1187 | (largepage && has_wrprotected_page(vcpu->kvm, gfn))) { | ||
| 1188 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 1478 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
| 1189 | __func__, gfn); | 1479 | __func__, gfn); |
| 1480 | ret = 1; | ||
| 1190 | pte_access &= ~ACC_WRITE_MASK; | 1481 | pte_access &= ~ACC_WRITE_MASK; |
| 1191 | if (is_writeble_pte(spte)) { | 1482 | if (is_writeble_pte(spte)) |
| 1192 | spte &= ~PT_WRITABLE_MASK; | 1483 | spte &= ~PT_WRITABLE_MASK; |
| 1193 | kvm_x86_ops->tlb_flush(vcpu); | ||
| 1194 | } | ||
| 1195 | if (write_fault) | ||
| 1196 | *ptwrite = 1; | ||
| 1197 | } | 1484 | } |
| 1198 | } | 1485 | } |
| 1199 | 1486 | ||
| 1200 | if (pte_access & ACC_WRITE_MASK) | 1487 | if (pte_access & ACC_WRITE_MASK) |
| 1201 | mark_page_dirty(vcpu->kvm, gfn); | 1488 | mark_page_dirty(vcpu->kvm, gfn); |
| 1202 | 1489 | ||
| 1203 | pgprintk("%s: setting spte %llx\n", __func__, spte); | 1490 | set_pte: |
| 1204 | pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", | ||
| 1205 | (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", | ||
| 1206 | (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); | ||
| 1207 | set_shadow_pte(shadow_pte, spte); | 1491 | set_shadow_pte(shadow_pte, spte); |
| 1208 | if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK) | 1492 | return ret; |
| 1209 | && (spte & PT_PRESENT_MASK)) | 1493 | } |
| 1494 | |||
| 1495 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | ||
| 1496 | unsigned pt_access, unsigned pte_access, | ||
| 1497 | int user_fault, int write_fault, int dirty, | ||
| 1498 | int *ptwrite, int largepage, gfn_t gfn, | ||
| 1499 | pfn_t pfn, bool speculative) | ||
| 1500 | { | ||
| 1501 | int was_rmapped = 0; | ||
| 1502 | int was_writeble = is_writeble_pte(*shadow_pte); | ||
| 1503 | |||
| 1504 | pgprintk("%s: spte %llx access %x write_fault %d" | ||
| 1505 | " user_fault %d gfn %lx\n", | ||
| 1506 | __func__, *shadow_pte, pt_access, | ||
| 1507 | write_fault, user_fault, gfn); | ||
| 1508 | |||
| 1509 | if (is_rmap_pte(*shadow_pte)) { | ||
| 1510 | /* | ||
| 1511 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink | ||
| 1512 | * the parent of the now unreachable PTE. | ||
| 1513 | */ | ||
| 1514 | if (largepage && !is_large_pte(*shadow_pte)) { | ||
| 1515 | struct kvm_mmu_page *child; | ||
| 1516 | u64 pte = *shadow_pte; | ||
| 1517 | |||
| 1518 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
| 1519 | mmu_page_remove_parent_pte(child, shadow_pte); | ||
| 1520 | } else if (pfn != spte_to_pfn(*shadow_pte)) { | ||
| 1521 | pgprintk("hfn old %lx new %lx\n", | ||
| 1522 | spte_to_pfn(*shadow_pte), pfn); | ||
| 1523 | rmap_remove(vcpu->kvm, shadow_pte); | ||
| 1524 | } else { | ||
| 1525 | if (largepage) | ||
| 1526 | was_rmapped = is_large_pte(*shadow_pte); | ||
| 1527 | else | ||
| 1528 | was_rmapped = 1; | ||
| 1529 | } | ||
| 1530 | } | ||
| 1531 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, | ||
| 1532 | dirty, largepage, gfn, pfn, speculative, true)) { | ||
| 1533 | if (write_fault) | ||
| 1534 | *ptwrite = 1; | ||
| 1535 | kvm_x86_ops->tlb_flush(vcpu); | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | pgprintk("%s: setting spte %llx\n", __func__, *shadow_pte); | ||
| 1539 | pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", | ||
| 1540 | is_large_pte(*shadow_pte)? "2MB" : "4kB", | ||
| 1541 | is_present_pte(*shadow_pte)?"RW":"R", gfn, | ||
| 1542 | *shadow_pte, shadow_pte); | ||
| 1543 | if (!was_rmapped && is_large_pte(*shadow_pte)) | ||
| 1210 | ++vcpu->kvm->stat.lpages; | 1544 | ++vcpu->kvm->stat.lpages; |
| 1211 | 1545 | ||
| 1212 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); | 1546 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); |
| @@ -1230,54 +1564,67 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
| 1230 | { | 1564 | { |
| 1231 | } | 1565 | } |
| 1232 | 1566 | ||
| 1233 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 1567 | struct direct_shadow_walk { |
| 1234 | int largepage, gfn_t gfn, pfn_t pfn, | 1568 | struct kvm_shadow_walk walker; |
| 1235 | int level) | 1569 | pfn_t pfn; |
| 1236 | { | 1570 | int write; |
| 1237 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; | 1571 | int largepage; |
| 1238 | int pt_write = 0; | 1572 | int pt_write; |
| 1239 | 1573 | }; | |
| 1240 | for (; ; level--) { | ||
| 1241 | u32 index = PT64_INDEX(v, level); | ||
| 1242 | u64 *table; | ||
| 1243 | |||
| 1244 | ASSERT(VALID_PAGE(table_addr)); | ||
| 1245 | table = __va(table_addr); | ||
| 1246 | 1574 | ||
| 1247 | if (level == 1) { | 1575 | static int direct_map_entry(struct kvm_shadow_walk *_walk, |
| 1248 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | 1576 | struct kvm_vcpu *vcpu, |
| 1249 | 0, write, 1, &pt_write, 0, gfn, pfn, false); | 1577 | u64 addr, u64 *sptep, int level) |
| 1250 | return pt_write; | 1578 | { |
| 1251 | } | 1579 | struct direct_shadow_walk *walk = |
| 1580 | container_of(_walk, struct direct_shadow_walk, walker); | ||
| 1581 | struct kvm_mmu_page *sp; | ||
| 1582 | gfn_t pseudo_gfn; | ||
| 1583 | gfn_t gfn = addr >> PAGE_SHIFT; | ||
| 1584 | |||
| 1585 | if (level == PT_PAGE_TABLE_LEVEL | ||
| 1586 | || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { | ||
| 1587 | mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, | ||
| 1588 | 0, walk->write, 1, &walk->pt_write, | ||
| 1589 | walk->largepage, gfn, walk->pfn, false); | ||
| 1590 | ++vcpu->stat.pf_fixed; | ||
| 1591 | return 1; | ||
| 1592 | } | ||
| 1252 | 1593 | ||
| 1253 | if (largepage && level == 2) { | 1594 | if (*sptep == shadow_trap_nonpresent_pte) { |
| 1254 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | 1595 | pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| 1255 | 0, write, 1, &pt_write, 1, gfn, pfn, false); | 1596 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1, |
| 1256 | return pt_write; | 1597 | 1, ACC_ALL, sptep); |
| 1598 | if (!sp) { | ||
| 1599 | pgprintk("nonpaging_map: ENOMEM\n"); | ||
| 1600 | kvm_release_pfn_clean(walk->pfn); | ||
| 1601 | return -ENOMEM; | ||
| 1257 | } | 1602 | } |
| 1258 | 1603 | ||
| 1259 | if (table[index] == shadow_trap_nonpresent_pte) { | 1604 | set_shadow_pte(sptep, |
| 1260 | struct kvm_mmu_page *new_table; | 1605 | __pa(sp->spt) |
| 1261 | gfn_t pseudo_gfn; | 1606 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
| 1262 | 1607 | | shadow_user_mask | shadow_x_mask); | |
| 1263 | pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK) | ||
| 1264 | >> PAGE_SHIFT; | ||
| 1265 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | ||
| 1266 | v, level - 1, | ||
| 1267 | 1, ACC_ALL, &table[index]); | ||
| 1268 | if (!new_table) { | ||
| 1269 | pgprintk("nonpaging_map: ENOMEM\n"); | ||
| 1270 | kvm_release_pfn_clean(pfn); | ||
| 1271 | return -ENOMEM; | ||
| 1272 | } | ||
| 1273 | |||
| 1274 | set_shadow_pte(&table[index], | ||
| 1275 | __pa(new_table->spt) | ||
| 1276 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | ||
| 1277 | | shadow_user_mask | shadow_x_mask); | ||
| 1278 | } | ||
| 1279 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | ||
| 1280 | } | 1608 | } |
| 1609 | return 0; | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | ||
| 1613 | int largepage, gfn_t gfn, pfn_t pfn) | ||
| 1614 | { | ||
| 1615 | int r; | ||
| 1616 | struct direct_shadow_walk walker = { | ||
| 1617 | .walker = { .entry = direct_map_entry, }, | ||
| 1618 | .pfn = pfn, | ||
| 1619 | .largepage = largepage, | ||
| 1620 | .write = write, | ||
| 1621 | .pt_write = 0, | ||
| 1622 | }; | ||
| 1623 | |||
| 1624 | r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT); | ||
| 1625 | if (r < 0) | ||
| 1626 | return r; | ||
| 1627 | return walker.pt_write; | ||
| 1281 | } | 1628 | } |
| 1282 | 1629 | ||
| 1283 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 1630 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
| @@ -1287,16 +1634,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1287 | pfn_t pfn; | 1634 | pfn_t pfn; |
| 1288 | unsigned long mmu_seq; | 1635 | unsigned long mmu_seq; |
| 1289 | 1636 | ||
| 1290 | down_read(¤t->mm->mmap_sem); | ||
| 1291 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1637 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
| 1292 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1638 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
| 1293 | largepage = 1; | 1639 | largepage = 1; |
| 1294 | } | 1640 | } |
| 1295 | 1641 | ||
| 1296 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 1642 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 1297 | /* implicit mb(), we'll read before PT lock is unlocked */ | 1643 | smp_rmb(); |
| 1298 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1644 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1299 | up_read(¤t->mm->mmap_sem); | ||
| 1300 | 1645 | ||
| 1301 | /* mmio */ | 1646 | /* mmio */ |
| 1302 | if (is_error_pfn(pfn)) { | 1647 | if (is_error_pfn(pfn)) { |
| @@ -1308,8 +1653,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1308 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 1653 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
| 1309 | goto out_unlock; | 1654 | goto out_unlock; |
| 1310 | kvm_mmu_free_some_pages(vcpu); | 1655 | kvm_mmu_free_some_pages(vcpu); |
| 1311 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, | 1656 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn); |
| 1312 | PT32E_ROOT_LEVEL); | ||
| 1313 | spin_unlock(&vcpu->kvm->mmu_lock); | 1657 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 1314 | 1658 | ||
| 1315 | 1659 | ||
| @@ -1405,6 +1749,37 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 1405 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 1749 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
| 1406 | } | 1750 | } |
| 1407 | 1751 | ||
| 1752 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) | ||
| 1753 | { | ||
| 1754 | int i; | ||
| 1755 | struct kvm_mmu_page *sp; | ||
| 1756 | |||
| 1757 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
| 1758 | return; | ||
| 1759 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | ||
| 1760 | hpa_t root = vcpu->arch.mmu.root_hpa; | ||
| 1761 | sp = page_header(root); | ||
| 1762 | mmu_sync_children(vcpu, sp); | ||
| 1763 | return; | ||
| 1764 | } | ||
| 1765 | for (i = 0; i < 4; ++i) { | ||
| 1766 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | ||
| 1767 | |||
| 1768 | if (root) { | ||
| 1769 | root &= PT64_BASE_ADDR_MASK; | ||
| 1770 | sp = page_header(root); | ||
| 1771 | mmu_sync_children(vcpu, sp); | ||
| 1772 | } | ||
| 1773 | } | ||
| 1774 | } | ||
| 1775 | |||
| 1776 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | ||
| 1777 | { | ||
| 1778 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 1779 | mmu_sync_roots(vcpu); | ||
| 1780 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 1781 | } | ||
| 1782 | |||
| 1408 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 1783 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) |
| 1409 | { | 1784 | { |
| 1410 | return vaddr; | 1785 | return vaddr; |
| @@ -1446,15 +1821,13 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
| 1446 | if (r) | 1821 | if (r) |
| 1447 | return r; | 1822 | return r; |
| 1448 | 1823 | ||
| 1449 | down_read(¤t->mm->mmap_sem); | ||
| 1450 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1824 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
| 1451 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1825 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
| 1452 | largepage = 1; | 1826 | largepage = 1; |
| 1453 | } | 1827 | } |
| 1454 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 1828 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 1455 | /* implicit mb(), we'll read before PT lock is unlocked */ | 1829 | smp_rmb(); |
| 1456 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1830 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1457 | up_read(¤t->mm->mmap_sem); | ||
| 1458 | if (is_error_pfn(pfn)) { | 1831 | if (is_error_pfn(pfn)) { |
| 1459 | kvm_release_pfn_clean(pfn); | 1832 | kvm_release_pfn_clean(pfn); |
| 1460 | return 1; | 1833 | return 1; |
| @@ -1464,7 +1837,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
| 1464 | goto out_unlock; | 1837 | goto out_unlock; |
| 1465 | kvm_mmu_free_some_pages(vcpu); | 1838 | kvm_mmu_free_some_pages(vcpu); |
| 1466 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1839 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
| 1467 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); | 1840 | largepage, gfn, pfn); |
| 1468 | spin_unlock(&vcpu->kvm->mmu_lock); | 1841 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 1469 | 1842 | ||
| 1470 | return r; | 1843 | return r; |
| @@ -1489,6 +1862,8 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
| 1489 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 1862 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
| 1490 | context->free = nonpaging_free; | 1863 | context->free = nonpaging_free; |
| 1491 | context->prefetch_page = nonpaging_prefetch_page; | 1864 | context->prefetch_page = nonpaging_prefetch_page; |
| 1865 | context->sync_page = nonpaging_sync_page; | ||
| 1866 | context->invlpg = nonpaging_invlpg; | ||
| 1492 | context->root_level = 0; | 1867 | context->root_level = 0; |
| 1493 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 1868 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
| 1494 | context->root_hpa = INVALID_PAGE; | 1869 | context->root_hpa = INVALID_PAGE; |
| @@ -1536,6 +1911,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | |||
| 1536 | context->page_fault = paging64_page_fault; | 1911 | context->page_fault = paging64_page_fault; |
| 1537 | context->gva_to_gpa = paging64_gva_to_gpa; | 1912 | context->gva_to_gpa = paging64_gva_to_gpa; |
| 1538 | context->prefetch_page = paging64_prefetch_page; | 1913 | context->prefetch_page = paging64_prefetch_page; |
| 1914 | context->sync_page = paging64_sync_page; | ||
| 1915 | context->invlpg = paging64_invlpg; | ||
| 1539 | context->free = paging_free; | 1916 | context->free = paging_free; |
| 1540 | context->root_level = level; | 1917 | context->root_level = level; |
| 1541 | context->shadow_root_level = level; | 1918 | context->shadow_root_level = level; |
| @@ -1557,6 +1934,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
| 1557 | context->gva_to_gpa = paging32_gva_to_gpa; | 1934 | context->gva_to_gpa = paging32_gva_to_gpa; |
| 1558 | context->free = paging_free; | 1935 | context->free = paging_free; |
| 1559 | context->prefetch_page = paging32_prefetch_page; | 1936 | context->prefetch_page = paging32_prefetch_page; |
| 1937 | context->sync_page = paging32_sync_page; | ||
| 1938 | context->invlpg = paging32_invlpg; | ||
| 1560 | context->root_level = PT32_ROOT_LEVEL; | 1939 | context->root_level = PT32_ROOT_LEVEL; |
| 1561 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 1940 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
| 1562 | context->root_hpa = INVALID_PAGE; | 1941 | context->root_hpa = INVALID_PAGE; |
| @@ -1576,6 +1955,8 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
| 1576 | context->page_fault = tdp_page_fault; | 1955 | context->page_fault = tdp_page_fault; |
| 1577 | context->free = nonpaging_free; | 1956 | context->free = nonpaging_free; |
| 1578 | context->prefetch_page = nonpaging_prefetch_page; | 1957 | context->prefetch_page = nonpaging_prefetch_page; |
| 1958 | context->sync_page = nonpaging_sync_page; | ||
| 1959 | context->invlpg = nonpaging_invlpg; | ||
| 1579 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 1960 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
| 1580 | context->root_hpa = INVALID_PAGE; | 1961 | context->root_hpa = INVALID_PAGE; |
| 1581 | 1962 | ||
| @@ -1647,6 +2028,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
| 1647 | spin_lock(&vcpu->kvm->mmu_lock); | 2028 | spin_lock(&vcpu->kvm->mmu_lock); |
| 1648 | kvm_mmu_free_some_pages(vcpu); | 2029 | kvm_mmu_free_some_pages(vcpu); |
| 1649 | mmu_alloc_roots(vcpu); | 2030 | mmu_alloc_roots(vcpu); |
| 2031 | mmu_sync_roots(vcpu); | ||
| 1650 | spin_unlock(&vcpu->kvm->mmu_lock); | 2032 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 1651 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); | 2033 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); |
| 1652 | kvm_mmu_flush_tlb(vcpu); | 2034 | kvm_mmu_flush_tlb(vcpu); |
| @@ -1767,15 +2149,13 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 1767 | return; | 2149 | return; |
| 1768 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2150 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| 1769 | 2151 | ||
| 1770 | down_read(¤t->mm->mmap_sem); | ||
| 1771 | if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) { | 2152 | if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) { |
| 1772 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 2153 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
| 1773 | vcpu->arch.update_pte.largepage = 1; | 2154 | vcpu->arch.update_pte.largepage = 1; |
| 1774 | } | 2155 | } |
| 1775 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2156 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 1776 | /* implicit mb(), we'll read before PT lock is unlocked */ | 2157 | smp_rmb(); |
| 1777 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2158 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1778 | up_read(¤t->mm->mmap_sem); | ||
| 1779 | 2159 | ||
| 1780 | if (is_error_pfn(pfn)) { | 2160 | if (is_error_pfn(pfn)) { |
| 1781 | kvm_release_pfn_clean(pfn); | 2161 | kvm_release_pfn_clean(pfn); |
| @@ -1837,7 +2217,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 1837 | index = kvm_page_table_hashfn(gfn); | 2217 | index = kvm_page_table_hashfn(gfn); |
| 1838 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2218 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 1839 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2219 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
| 1840 | if (sp->gfn != gfn || sp->role.metaphysical) | 2220 | if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid) |
| 1841 | continue; | 2221 | continue; |
| 1842 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 2222 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; |
| 1843 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2223 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
| @@ -1855,7 +2235,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 1855 | */ | 2235 | */ |
| 1856 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2236 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
| 1857 | gpa, bytes, sp->role.word); | 2237 | gpa, bytes, sp->role.word); |
| 1858 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2238 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) |
| 2239 | n = bucket->first; | ||
| 1859 | ++vcpu->kvm->stat.mmu_flooded; | 2240 | ++vcpu->kvm->stat.mmu_flooded; |
| 1860 | continue; | 2241 | continue; |
| 1861 | } | 2242 | } |
| @@ -1969,6 +2350,16 @@ out: | |||
| 1969 | } | 2350 | } |
| 1970 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | 2351 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); |
| 1971 | 2352 | ||
| 2353 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 2354 | { | ||
| 2355 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2356 | vcpu->arch.mmu.invlpg(vcpu, gva); | ||
| 2357 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2358 | kvm_mmu_flush_tlb(vcpu); | ||
| 2359 | ++vcpu->stat.invlpg; | ||
| 2360 | } | ||
| 2361 | EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); | ||
| 2362 | |||
| 1972 | void kvm_enable_tdp(void) | 2363 | void kvm_enable_tdp(void) |
| 1973 | { | 2364 | { |
| 1974 | tdp_enabled = true; | 2365 | tdp_enabled = true; |
| @@ -2055,6 +2446,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 2055 | { | 2446 | { |
| 2056 | struct kvm_mmu_page *sp; | 2447 | struct kvm_mmu_page *sp; |
| 2057 | 2448 | ||
| 2449 | spin_lock(&kvm->mmu_lock); | ||
| 2058 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 2450 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { |
| 2059 | int i; | 2451 | int i; |
| 2060 | u64 *pt; | 2452 | u64 *pt; |
| @@ -2068,6 +2460,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 2068 | if (pt[i] & PT_WRITABLE_MASK) | 2460 | if (pt[i] & PT_WRITABLE_MASK) |
| 2069 | pt[i] &= ~PT_WRITABLE_MASK; | 2461 | pt[i] &= ~PT_WRITABLE_MASK; |
| 2070 | } | 2462 | } |
| 2463 | kvm_flush_remote_tlbs(kvm); | ||
| 2464 | spin_unlock(&kvm->mmu_lock); | ||
| 2071 | } | 2465 | } |
| 2072 | 2466 | ||
| 2073 | void kvm_mmu_zap_all(struct kvm *kvm) | 2467 | void kvm_mmu_zap_all(struct kvm *kvm) |
| @@ -2076,7 +2470,9 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
| 2076 | 2470 | ||
| 2077 | spin_lock(&kvm->mmu_lock); | 2471 | spin_lock(&kvm->mmu_lock); |
| 2078 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 2472 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
| 2079 | kvm_mmu_zap_page(kvm, sp); | 2473 | if (kvm_mmu_zap_page(kvm, sp)) |
| 2474 | node = container_of(kvm->arch.active_mmu_pages.next, | ||
| 2475 | struct kvm_mmu_page, link); | ||
| 2080 | spin_unlock(&kvm->mmu_lock); | 2476 | spin_unlock(&kvm->mmu_lock); |
| 2081 | 2477 | ||
| 2082 | kvm_flush_remote_tlbs(kvm); | 2478 | kvm_flush_remote_tlbs(kvm); |
| @@ -2291,18 +2687,18 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | |||
| 2291 | gpa_t addr, unsigned long *ret) | 2687 | gpa_t addr, unsigned long *ret) |
| 2292 | { | 2688 | { |
| 2293 | int r; | 2689 | int r; |
| 2294 | struct kvm_pv_mmu_op_buffer buffer; | 2690 | struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer; |
| 2295 | 2691 | ||
| 2296 | buffer.ptr = buffer.buf; | 2692 | buffer->ptr = buffer->buf; |
| 2297 | buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf); | 2693 | buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf); |
| 2298 | buffer.processed = 0; | 2694 | buffer->processed = 0; |
| 2299 | 2695 | ||
| 2300 | r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len); | 2696 | r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len); |
| 2301 | if (r) | 2697 | if (r) |
| 2302 | goto out; | 2698 | goto out; |
| 2303 | 2699 | ||
| 2304 | while (buffer.len) { | 2700 | while (buffer->len) { |
| 2305 | r = kvm_pv_mmu_op_one(vcpu, &buffer); | 2701 | r = kvm_pv_mmu_op_one(vcpu, buffer); |
| 2306 | if (r < 0) | 2702 | if (r < 0) |
| 2307 | goto out; | 2703 | goto out; |
| 2308 | if (r == 0) | 2704 | if (r == 0) |
| @@ -2311,7 +2707,7 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | |||
| 2311 | 2707 | ||
| 2312 | r = 1; | 2708 | r = 1; |
| 2313 | out: | 2709 | out: |
| 2314 | *ret = buffer.processed; | 2710 | *ret = buffer->processed; |
| 2315 | return r; | 2711 | return r; |
| 2316 | } | 2712 | } |
| 2317 | 2713 | ||
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4a814bff21f2..613ec9aa674a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -25,11 +25,11 @@ | |||
| 25 | #if PTTYPE == 64 | 25 | #if PTTYPE == 64 |
| 26 | #define pt_element_t u64 | 26 | #define pt_element_t u64 |
| 27 | #define guest_walker guest_walker64 | 27 | #define guest_walker guest_walker64 |
| 28 | #define shadow_walker shadow_walker64 | ||
| 28 | #define FNAME(name) paging##64_##name | 29 | #define FNAME(name) paging##64_##name |
| 29 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK | 30 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK |
| 30 | #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK | 31 | #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK |
| 31 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 32 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
| 32 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
| 33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | 33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) |
| 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
| 35 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
| @@ -42,11 +42,11 @@ | |||
| 42 | #elif PTTYPE == 32 | 42 | #elif PTTYPE == 32 |
| 43 | #define pt_element_t u32 | 43 | #define pt_element_t u32 |
| 44 | #define guest_walker guest_walker32 | 44 | #define guest_walker guest_walker32 |
| 45 | #define shadow_walker shadow_walker32 | ||
| 45 | #define FNAME(name) paging##32_##name | 46 | #define FNAME(name) paging##32_##name |
| 46 | #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK | 47 | #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK |
| 47 | #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK | 48 | #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK |
| 48 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
| 49 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
| 50 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | 50 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) |
| 51 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 51 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
| 52 | #define PT_MAX_FULL_LEVELS 2 | 52 | #define PT_MAX_FULL_LEVELS 2 |
| @@ -73,6 +73,17 @@ struct guest_walker { | |||
| 73 | u32 error_code; | 73 | u32 error_code; |
| 74 | }; | 74 | }; |
| 75 | 75 | ||
| 76 | struct shadow_walker { | ||
| 77 | struct kvm_shadow_walk walker; | ||
| 78 | struct guest_walker *guest_walker; | ||
| 79 | int user_fault; | ||
| 80 | int write_fault; | ||
| 81 | int largepage; | ||
| 82 | int *ptwrite; | ||
| 83 | pfn_t pfn; | ||
| 84 | u64 *sptep; | ||
| 85 | }; | ||
| 86 | |||
| 76 | static gfn_t gpte_to_gfn(pt_element_t gpte) | 87 | static gfn_t gpte_to_gfn(pt_element_t gpte) |
| 77 | { | 88 | { |
| 78 | return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; | 89 | return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| @@ -91,14 +102,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, | |||
| 91 | pt_element_t *table; | 102 | pt_element_t *table; |
| 92 | struct page *page; | 103 | struct page *page; |
| 93 | 104 | ||
| 94 | down_read(¤t->mm->mmap_sem); | ||
| 95 | page = gfn_to_page(kvm, table_gfn); | 105 | page = gfn_to_page(kvm, table_gfn); |
| 96 | up_read(¤t->mm->mmap_sem); | ||
| 97 | 106 | ||
| 98 | table = kmap_atomic(page, KM_USER0); | 107 | table = kmap_atomic(page, KM_USER0); |
| 99 | |||
| 100 | ret = CMPXCHG(&table[index], orig_pte, new_pte); | 108 | ret = CMPXCHG(&table[index], orig_pte, new_pte); |
| 101 | |||
| 102 | kunmap_atomic(table, KM_USER0); | 109 | kunmap_atomic(table, KM_USER0); |
| 103 | 110 | ||
| 104 | kvm_release_page_dirty(page); | 111 | kvm_release_page_dirty(page); |
| @@ -274,86 +281,89 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 274 | /* | 281 | /* |
| 275 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 282 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
| 276 | */ | 283 | */ |
| 277 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 284 | static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, |
| 278 | struct guest_walker *walker, | 285 | struct kvm_vcpu *vcpu, u64 addr, |
| 279 | int user_fault, int write_fault, int largepage, | 286 | u64 *sptep, int level) |
| 280 | int *ptwrite, pfn_t pfn) | ||
| 281 | { | 287 | { |
| 282 | hpa_t shadow_addr; | 288 | struct shadow_walker *sw = |
| 283 | int level; | 289 | container_of(_sw, struct shadow_walker, walker); |
| 284 | u64 *shadow_ent; | 290 | struct guest_walker *gw = sw->guest_walker; |
| 285 | unsigned access = walker->pt_access; | 291 | unsigned access = gw->pt_access; |
| 286 | 292 | struct kvm_mmu_page *shadow_page; | |
| 287 | if (!is_present_pte(walker->ptes[walker->level - 1])) | 293 | u64 spte; |
| 288 | return NULL; | 294 | int metaphysical; |
| 289 | 295 | gfn_t table_gfn; | |
| 290 | shadow_addr = vcpu->arch.mmu.root_hpa; | 296 | int r; |
| 291 | level = vcpu->arch.mmu.shadow_root_level; | 297 | pt_element_t curr_pte; |
| 292 | if (level == PT32E_ROOT_LEVEL) { | 298 | |
| 293 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | 299 | if (level == PT_PAGE_TABLE_LEVEL |
| 294 | shadow_addr &= PT64_BASE_ADDR_MASK; | 300 | || (sw->largepage && level == PT_DIRECTORY_LEVEL)) { |
| 295 | --level; | 301 | mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, |
| 302 | sw->user_fault, sw->write_fault, | ||
| 303 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | ||
| 304 | sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, | ||
| 305 | false); | ||
| 306 | sw->sptep = sptep; | ||
| 307 | return 1; | ||
| 296 | } | 308 | } |
| 297 | 309 | ||
| 298 | for (; ; level--) { | 310 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) |
| 299 | u32 index = SHADOW_PT_INDEX(addr, level); | 311 | return 0; |
| 300 | struct kvm_mmu_page *shadow_page; | ||
| 301 | u64 shadow_pte; | ||
| 302 | int metaphysical; | ||
| 303 | gfn_t table_gfn; | ||
| 304 | |||
| 305 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | ||
| 306 | if (level == PT_PAGE_TABLE_LEVEL) | ||
| 307 | break; | ||
| 308 | |||
| 309 | if (largepage && level == PT_DIRECTORY_LEVEL) | ||
| 310 | break; | ||
| 311 | 312 | ||
| 312 | if (is_shadow_present_pte(*shadow_ent) | 313 | if (is_large_pte(*sptep)) { |
| 313 | && !is_large_pte(*shadow_ent)) { | 314 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); |
| 314 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | 315 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 315 | continue; | 316 | rmap_remove(vcpu->kvm, sptep); |
| 316 | } | 317 | } |
| 317 | 318 | ||
| 318 | if (is_large_pte(*shadow_ent)) | 319 | if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { |
| 319 | rmap_remove(vcpu->kvm, shadow_ent); | 320 | metaphysical = 1; |
| 320 | 321 | if (!is_dirty_pte(gw->ptes[level - 1])) | |
| 321 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 322 | access &= ~ACC_WRITE_MASK; |
| 322 | && walker->level == PT_DIRECTORY_LEVEL) { | 323 | table_gfn = gpte_to_gfn(gw->ptes[level - 1]); |
| 323 | metaphysical = 1; | 324 | } else { |
| 324 | if (!is_dirty_pte(walker->ptes[level - 1])) | 325 | metaphysical = 0; |
| 325 | access &= ~ACC_WRITE_MASK; | 326 | table_gfn = gw->table_gfn[level - 2]; |
| 326 | table_gfn = gpte_to_gfn(walker->ptes[level - 1]); | 327 | } |
| 327 | } else { | 328 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1, |
| 328 | metaphysical = 0; | 329 | metaphysical, access, sptep); |
| 329 | table_gfn = walker->table_gfn[level - 2]; | 330 | if (!metaphysical) { |
| 330 | } | 331 | r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], |
| 331 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 332 | &curr_pte, sizeof(curr_pte)); |
| 332 | metaphysical, access, | 333 | if (r || curr_pte != gw->ptes[level - 2]) { |
| 333 | shadow_ent); | 334 | kvm_release_pfn_clean(sw->pfn); |
| 334 | if (!metaphysical) { | 335 | sw->sptep = NULL; |
| 335 | int r; | 336 | return 1; |
| 336 | pt_element_t curr_pte; | ||
| 337 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
| 338 | walker->pte_gpa[level - 2], | ||
| 339 | &curr_pte, sizeof(curr_pte)); | ||
| 340 | if (r || curr_pte != walker->ptes[level - 2]) { | ||
| 341 | kvm_release_pfn_clean(pfn); | ||
| 342 | return NULL; | ||
| 343 | } | ||
| 344 | } | 337 | } |
| 345 | shadow_addr = __pa(shadow_page->spt); | ||
| 346 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | ||
| 347 | | PT_WRITABLE_MASK | PT_USER_MASK; | ||
| 348 | set_shadow_pte(shadow_ent, shadow_pte); | ||
| 349 | } | 338 | } |
| 350 | 339 | ||
| 351 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 340 | spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK |
| 352 | user_fault, write_fault, | 341 | | PT_WRITABLE_MASK | PT_USER_MASK; |
| 353 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, | 342 | *sptep = spte; |
| 354 | ptwrite, largepage, walker->gfn, pfn, false); | 343 | return 0; |
| 344 | } | ||
| 345 | |||
| 346 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | ||
| 347 | struct guest_walker *guest_walker, | ||
| 348 | int user_fault, int write_fault, int largepage, | ||
| 349 | int *ptwrite, pfn_t pfn) | ||
| 350 | { | ||
| 351 | struct shadow_walker walker = { | ||
| 352 | .walker = { .entry = FNAME(shadow_walk_entry), }, | ||
| 353 | .guest_walker = guest_walker, | ||
| 354 | .user_fault = user_fault, | ||
| 355 | .write_fault = write_fault, | ||
| 356 | .largepage = largepage, | ||
| 357 | .ptwrite = ptwrite, | ||
| 358 | .pfn = pfn, | ||
| 359 | }; | ||
| 360 | |||
| 361 | if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1])) | ||
| 362 | return NULL; | ||
| 363 | |||
| 364 | walk_shadow(&walker.walker, vcpu, addr); | ||
| 355 | 365 | ||
| 356 | return shadow_ent; | 366 | return walker.sptep; |
| 357 | } | 367 | } |
| 358 | 368 | ||
| 359 | /* | 369 | /* |
| @@ -407,7 +417,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 407 | return 0; | 417 | return 0; |
| 408 | } | 418 | } |
| 409 | 419 | ||
| 410 | down_read(¤t->mm->mmap_sem); | ||
| 411 | if (walker.level == PT_DIRECTORY_LEVEL) { | 420 | if (walker.level == PT_DIRECTORY_LEVEL) { |
| 412 | gfn_t large_gfn; | 421 | gfn_t large_gfn; |
| 413 | large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); | 422 | large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); |
| @@ -417,9 +426,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 417 | } | 426 | } |
| 418 | } | 427 | } |
| 419 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 428 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 420 | /* implicit mb(), we'll read before PT lock is unlocked */ | 429 | smp_rmb(); |
| 421 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 430 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
| 422 | up_read(¤t->mm->mmap_sem); | ||
| 423 | 431 | ||
| 424 | /* mmio */ | 432 | /* mmio */ |
| 425 | if (is_error_pfn(pfn)) { | 433 | if (is_error_pfn(pfn)) { |
| @@ -453,6 +461,31 @@ out_unlock: | |||
| 453 | return 0; | 461 | return 0; |
| 454 | } | 462 | } |
| 455 | 463 | ||
| 464 | static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, | ||
| 465 | struct kvm_vcpu *vcpu, u64 addr, | ||
| 466 | u64 *sptep, int level) | ||
| 467 | { | ||
| 468 | |||
| 469 | if (level == PT_PAGE_TABLE_LEVEL) { | ||
| 470 | if (is_shadow_present_pte(*sptep)) | ||
| 471 | rmap_remove(vcpu->kvm, sptep); | ||
| 472 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); | ||
| 473 | return 1; | ||
| 474 | } | ||
| 475 | if (!is_shadow_present_pte(*sptep)) | ||
| 476 | return 1; | ||
| 477 | return 0; | ||
| 478 | } | ||
| 479 | |||
| 480 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 481 | { | ||
| 482 | struct shadow_walker walker = { | ||
| 483 | .walker = { .entry = FNAME(shadow_invlpg_entry), }, | ||
| 484 | }; | ||
| 485 | |||
| 486 | walk_shadow(&walker.walker, vcpu, gva); | ||
| 487 | } | ||
| 488 | |||
| 456 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 489 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
| 457 | { | 490 | { |
| 458 | struct guest_walker walker; | 491 | struct guest_walker walker; |
| @@ -499,12 +532,66 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
| 499 | } | 532 | } |
| 500 | } | 533 | } |
| 501 | 534 | ||
| 535 | /* | ||
| 536 | * Using the cached information from sp->gfns is safe because: | ||
| 537 | * - The spte has a reference to the struct page, so the pfn for a given gfn | ||
| 538 | * can't change unless all sptes pointing to it are nuked first. | ||
| 539 | * - Alias changes zap the entire shadow cache. | ||
| 540 | */ | ||
| 541 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
| 542 | { | ||
| 543 | int i, offset, nr_present; | ||
| 544 | |||
| 545 | offset = nr_present = 0; | ||
| 546 | |||
| 547 | if (PTTYPE == 32) | ||
| 548 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
| 549 | |||
| 550 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | ||
| 551 | unsigned pte_access; | ||
| 552 | pt_element_t gpte; | ||
| 553 | gpa_t pte_gpa; | ||
| 554 | gfn_t gfn = sp->gfns[i]; | ||
| 555 | |||
| 556 | if (!is_shadow_present_pte(sp->spt[i])) | ||
| 557 | continue; | ||
| 558 | |||
| 559 | pte_gpa = gfn_to_gpa(sp->gfn); | ||
| 560 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
| 561 | |||
| 562 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
| 563 | sizeof(pt_element_t))) | ||
| 564 | return -EINVAL; | ||
| 565 | |||
| 566 | if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) || | ||
| 567 | !(gpte & PT_ACCESSED_MASK)) { | ||
| 568 | u64 nonpresent; | ||
| 569 | |||
| 570 | rmap_remove(vcpu->kvm, &sp->spt[i]); | ||
| 571 | if (is_present_pte(gpte)) | ||
| 572 | nonpresent = shadow_trap_nonpresent_pte; | ||
| 573 | else | ||
| 574 | nonpresent = shadow_notrap_nonpresent_pte; | ||
| 575 | set_shadow_pte(&sp->spt[i], nonpresent); | ||
| 576 | continue; | ||
| 577 | } | ||
| 578 | |||
| 579 | nr_present++; | ||
| 580 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | ||
| 581 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | ||
| 582 | is_dirty_pte(gpte), 0, gfn, | ||
| 583 | spte_to_pfn(sp->spt[i]), true, false); | ||
| 584 | } | ||
| 585 | |||
| 586 | return !nr_present; | ||
| 587 | } | ||
| 588 | |||
| 502 | #undef pt_element_t | 589 | #undef pt_element_t |
| 503 | #undef guest_walker | 590 | #undef guest_walker |
| 591 | #undef shadow_walker | ||
| 504 | #undef FNAME | 592 | #undef FNAME |
| 505 | #undef PT_BASE_ADDR_MASK | 593 | #undef PT_BASE_ADDR_MASK |
| 506 | #undef PT_INDEX | 594 | #undef PT_INDEX |
| 507 | #undef SHADOW_PT_INDEX | ||
| 508 | #undef PT_LEVEL_MASK | 595 | #undef PT_LEVEL_MASK |
| 509 | #undef PT_DIR_BASE_ADDR_MASK | 596 | #undef PT_DIR_BASE_ADDR_MASK |
| 510 | #undef PT_LEVEL_BITS | 597 | #undef PT_LEVEL_BITS |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8233b86c778c..9c4ce657d963 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "kvm_svm.h" | 18 | #include "kvm_svm.h" |
| 19 | #include "irq.h" | 19 | #include "irq.h" |
| 20 | #include "mmu.h" | 20 | #include "mmu.h" |
| 21 | #include "kvm_cache_regs.h" | ||
| 21 | 22 | ||
| 22 | #include <linux/module.h> | 23 | #include <linux/module.h> |
| 23 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
| @@ -35,10 +36,6 @@ MODULE_LICENSE("GPL"); | |||
| 35 | #define IOPM_ALLOC_ORDER 2 | 36 | #define IOPM_ALLOC_ORDER 2 |
| 36 | #define MSRPM_ALLOC_ORDER 1 | 37 | #define MSRPM_ALLOC_ORDER 1 |
| 37 | 38 | ||
| 38 | #define DB_VECTOR 1 | ||
| 39 | #define UD_VECTOR 6 | ||
| 40 | #define GP_VECTOR 13 | ||
| 41 | |||
| 42 | #define DR7_GD_MASK (1 << 13) | 39 | #define DR7_GD_MASK (1 << 13) |
| 43 | #define DR6_BD_MASK (1 << 13) | 40 | #define DR6_BD_MASK (1 << 13) |
| 44 | 41 | ||
| @@ -47,7 +44,7 @@ MODULE_LICENSE("GPL"); | |||
| 47 | 44 | ||
| 48 | #define SVM_FEATURE_NPT (1 << 0) | 45 | #define SVM_FEATURE_NPT (1 << 0) |
| 49 | #define SVM_FEATURE_LBRV (1 << 1) | 46 | #define SVM_FEATURE_LBRV (1 << 1) |
| 50 | #define SVM_DEATURE_SVML (1 << 2) | 47 | #define SVM_FEATURE_SVML (1 << 2) |
| 51 | 48 | ||
| 52 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 49 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
| 53 | 50 | ||
| @@ -236,13 +233,11 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 236 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 233 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
| 237 | return; | 234 | return; |
| 238 | } | 235 | } |
| 239 | if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) | 236 | if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) |
| 240 | printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", | 237 | printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", |
| 241 | __func__, | 238 | __func__, kvm_rip_read(vcpu), svm->next_rip); |
| 242 | svm->vmcb->save.rip, | ||
| 243 | svm->next_rip); | ||
| 244 | 239 | ||
| 245 | vcpu->arch.rip = svm->vmcb->save.rip = svm->next_rip; | 240 | kvm_rip_write(vcpu, svm->next_rip); |
| 246 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; | 241 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; |
| 247 | 242 | ||
| 248 | vcpu->arch.interrupt_window_open = 1; | 243 | vcpu->arch.interrupt_window_open = 1; |
| @@ -530,6 +525,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 530 | (1ULL << INTERCEPT_CPUID) | | 525 | (1ULL << INTERCEPT_CPUID) | |
| 531 | (1ULL << INTERCEPT_INVD) | | 526 | (1ULL << INTERCEPT_INVD) | |
| 532 | (1ULL << INTERCEPT_HLT) | | 527 | (1ULL << INTERCEPT_HLT) | |
| 528 | (1ULL << INTERCEPT_INVLPG) | | ||
| 533 | (1ULL << INTERCEPT_INVLPGA) | | 529 | (1ULL << INTERCEPT_INVLPGA) | |
| 534 | (1ULL << INTERCEPT_IOIO_PROT) | | 530 | (1ULL << INTERCEPT_IOIO_PROT) | |
| 535 | (1ULL << INTERCEPT_MSR_PROT) | | 531 | (1ULL << INTERCEPT_MSR_PROT) | |
| @@ -581,6 +577,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 581 | save->dr7 = 0x400; | 577 | save->dr7 = 0x400; |
| 582 | save->rflags = 2; | 578 | save->rflags = 2; |
| 583 | save->rip = 0x0000fff0; | 579 | save->rip = 0x0000fff0; |
| 580 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | ||
| 584 | 581 | ||
| 585 | /* | 582 | /* |
| 586 | * cr0 val on cpu init should be 0x60000010, we enable cpu | 583 | * cr0 val on cpu init should be 0x60000010, we enable cpu |
| @@ -593,7 +590,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 593 | if (npt_enabled) { | 590 | if (npt_enabled) { |
| 594 | /* Setup VMCB for Nested Paging */ | 591 | /* Setup VMCB for Nested Paging */ |
| 595 | control->nested_ctl = 1; | 592 | control->nested_ctl = 1; |
| 596 | control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH); | 593 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | |
| 594 | (1ULL << INTERCEPT_INVLPG)); | ||
| 597 | control->intercept_exceptions &= ~(1 << PF_VECTOR); | 595 | control->intercept_exceptions &= ~(1 << PF_VECTOR); |
| 598 | control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| | 596 | control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| |
| 599 | INTERCEPT_CR3_MASK); | 597 | INTERCEPT_CR3_MASK); |
| @@ -615,10 +613,12 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 615 | init_vmcb(svm); | 613 | init_vmcb(svm); |
| 616 | 614 | ||
| 617 | if (vcpu->vcpu_id != 0) { | 615 | if (vcpu->vcpu_id != 0) { |
| 618 | svm->vmcb->save.rip = 0; | 616 | kvm_rip_write(vcpu, 0); |
| 619 | svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; | 617 | svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; |
| 620 | svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; | 618 | svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; |
| 621 | } | 619 | } |
| 620 | vcpu->arch.regs_avail = ~0; | ||
| 621 | vcpu->arch.regs_dirty = ~0; | ||
| 622 | 622 | ||
| 623 | return 0; | 623 | return 0; |
| 624 | } | 624 | } |
| @@ -721,23 +721,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 721 | rdtscll(vcpu->arch.host_tsc); | 721 | rdtscll(vcpu->arch.host_tsc); |
| 722 | } | 722 | } |
| 723 | 723 | ||
| 724 | static void svm_cache_regs(struct kvm_vcpu *vcpu) | ||
| 725 | { | ||
| 726 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 727 | |||
| 728 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | ||
| 729 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
| 730 | vcpu->arch.rip = svm->vmcb->save.rip; | ||
| 731 | } | ||
| 732 | |||
| 733 | static void svm_decache_regs(struct kvm_vcpu *vcpu) | ||
| 734 | { | ||
| 735 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 736 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 737 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 738 | svm->vmcb->save.rip = vcpu->arch.rip; | ||
| 739 | } | ||
| 740 | |||
| 741 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 724 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
| 742 | { | 725 | { |
| 743 | return to_svm(vcpu)->vmcb->save.rflags; | 726 | return to_svm(vcpu)->vmcb->save.rflags; |
| @@ -1040,7 +1023,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1040 | if (npt_enabled) | 1023 | if (npt_enabled) |
| 1041 | svm_flush_tlb(&svm->vcpu); | 1024 | svm_flush_tlb(&svm->vcpu); |
| 1042 | 1025 | ||
| 1043 | if (event_injection) | 1026 | if (!npt_enabled && event_injection) |
| 1044 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | 1027 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); |
| 1045 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1028 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
| 1046 | } | 1029 | } |
| @@ -1139,14 +1122,14 @@ static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1139 | 1122 | ||
| 1140 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1123 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1141 | { | 1124 | { |
| 1142 | svm->next_rip = svm->vmcb->save.rip + 1; | 1125 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; |
| 1143 | skip_emulated_instruction(&svm->vcpu); | 1126 | skip_emulated_instruction(&svm->vcpu); |
| 1144 | return kvm_emulate_halt(&svm->vcpu); | 1127 | return kvm_emulate_halt(&svm->vcpu); |
| 1145 | } | 1128 | } |
| 1146 | 1129 | ||
| 1147 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1130 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1148 | { | 1131 | { |
| 1149 | svm->next_rip = svm->vmcb->save.rip + 3; | 1132 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
| 1150 | skip_emulated_instruction(&svm->vcpu); | 1133 | skip_emulated_instruction(&svm->vcpu); |
| 1151 | kvm_emulate_hypercall(&svm->vcpu); | 1134 | kvm_emulate_hypercall(&svm->vcpu); |
| 1152 | return 1; | 1135 | return 1; |
| @@ -1178,11 +1161,18 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
| 1178 | 1161 | ||
| 1179 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1162 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1180 | { | 1163 | { |
| 1181 | svm->next_rip = svm->vmcb->save.rip + 2; | 1164 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
| 1182 | kvm_emulate_cpuid(&svm->vcpu); | 1165 | kvm_emulate_cpuid(&svm->vcpu); |
| 1183 | return 1; | 1166 | return 1; |
| 1184 | } | 1167 | } |
| 1185 | 1168 | ||
| 1169 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | ||
| 1170 | { | ||
| 1171 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | ||
| 1172 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | ||
| 1173 | return 1; | ||
| 1174 | } | ||
| 1175 | |||
| 1186 | static int emulate_on_interception(struct vcpu_svm *svm, | 1176 | static int emulate_on_interception(struct vcpu_svm *svm, |
| 1187 | struct kvm_run *kvm_run) | 1177 | struct kvm_run *kvm_run) |
| 1188 | { | 1178 | { |
| @@ -1273,9 +1263,9 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1273 | KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data, | 1263 | KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data, |
| 1274 | (u32)(data >> 32), handler); | 1264 | (u32)(data >> 32), handler); |
| 1275 | 1265 | ||
| 1276 | svm->vmcb->save.rax = data & 0xffffffff; | 1266 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; |
| 1277 | svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; | 1267 | svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; |
| 1278 | svm->next_rip = svm->vmcb->save.rip + 2; | 1268 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
| 1279 | skip_emulated_instruction(&svm->vcpu); | 1269 | skip_emulated_instruction(&svm->vcpu); |
| 1280 | } | 1270 | } |
| 1281 | return 1; | 1271 | return 1; |
| @@ -1359,13 +1349,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
| 1359 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1349 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1360 | { | 1350 | { |
| 1361 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1351 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
| 1362 | u64 data = (svm->vmcb->save.rax & -1u) | 1352 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
| 1363 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 1353 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
| 1364 | 1354 | ||
| 1365 | KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32), | 1355 | KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32), |
| 1366 | handler); | 1356 | handler); |
| 1367 | 1357 | ||
| 1368 | svm->next_rip = svm->vmcb->save.rip + 2; | 1358 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
| 1369 | if (svm_set_msr(&svm->vcpu, ecx, data)) | 1359 | if (svm_set_msr(&svm->vcpu, ecx, data)) |
| 1370 | kvm_inject_gp(&svm->vcpu, 0); | 1360 | kvm_inject_gp(&svm->vcpu, 0); |
| 1371 | else | 1361 | else |
| @@ -1436,7 +1426,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
| 1436 | [SVM_EXIT_CPUID] = cpuid_interception, | 1426 | [SVM_EXIT_CPUID] = cpuid_interception, |
| 1437 | [SVM_EXIT_INVD] = emulate_on_interception, | 1427 | [SVM_EXIT_INVD] = emulate_on_interception, |
| 1438 | [SVM_EXIT_HLT] = halt_interception, | 1428 | [SVM_EXIT_HLT] = halt_interception, |
| 1439 | [SVM_EXIT_INVLPG] = emulate_on_interception, | 1429 | [SVM_EXIT_INVLPG] = invlpg_interception, |
| 1440 | [SVM_EXIT_INVLPGA] = invalid_op_interception, | 1430 | [SVM_EXIT_INVLPGA] = invalid_op_interception, |
| 1441 | [SVM_EXIT_IOIO] = io_interception, | 1431 | [SVM_EXIT_IOIO] = io_interception, |
| 1442 | [SVM_EXIT_MSR] = msr_interception, | 1432 | [SVM_EXIT_MSR] = msr_interception, |
| @@ -1538,6 +1528,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
| 1538 | 1528 | ||
| 1539 | KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler); | 1529 | KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler); |
| 1540 | 1530 | ||
| 1531 | ++svm->vcpu.stat.irq_injections; | ||
| 1541 | control = &svm->vmcb->control; | 1532 | control = &svm->vmcb->control; |
| 1542 | control->int_vector = irq; | 1533 | control->int_vector = irq; |
| 1543 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 1534 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
| @@ -1716,6 +1707,12 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
| 1716 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 1707 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
| 1717 | } | 1708 | } |
| 1718 | 1709 | ||
| 1710 | #ifdef CONFIG_X86_64 | ||
| 1711 | #define R "r" | ||
| 1712 | #else | ||
| 1713 | #define R "e" | ||
| 1714 | #endif | ||
| 1715 | |||
| 1719 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1716 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1720 | { | 1717 | { |
| 1721 | struct vcpu_svm *svm = to_svm(vcpu); | 1718 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -1723,6 +1720,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1723 | u16 gs_selector; | 1720 | u16 gs_selector; |
| 1724 | u16 ldt_selector; | 1721 | u16 ldt_selector; |
| 1725 | 1722 | ||
| 1723 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 1724 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 1725 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
| 1726 | |||
| 1726 | pre_svm_run(svm); | 1727 | pre_svm_run(svm); |
| 1727 | 1728 | ||
| 1728 | sync_lapic_to_cr8(vcpu); | 1729 | sync_lapic_to_cr8(vcpu); |
| @@ -1750,19 +1751,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1750 | local_irq_enable(); | 1751 | local_irq_enable(); |
| 1751 | 1752 | ||
| 1752 | asm volatile ( | 1753 | asm volatile ( |
| 1754 | "push %%"R"bp; \n\t" | ||
| 1755 | "mov %c[rbx](%[svm]), %%"R"bx \n\t" | ||
| 1756 | "mov %c[rcx](%[svm]), %%"R"cx \n\t" | ||
| 1757 | "mov %c[rdx](%[svm]), %%"R"dx \n\t" | ||
| 1758 | "mov %c[rsi](%[svm]), %%"R"si \n\t" | ||
| 1759 | "mov %c[rdi](%[svm]), %%"R"di \n\t" | ||
| 1760 | "mov %c[rbp](%[svm]), %%"R"bp \n\t" | ||
| 1753 | #ifdef CONFIG_X86_64 | 1761 | #ifdef CONFIG_X86_64 |
| 1754 | "push %%rbp; \n\t" | ||
| 1755 | #else | ||
| 1756 | "push %%ebp; \n\t" | ||
| 1757 | #endif | ||
| 1758 | |||
| 1759 | #ifdef CONFIG_X86_64 | ||
| 1760 | "mov %c[rbx](%[svm]), %%rbx \n\t" | ||
| 1761 | "mov %c[rcx](%[svm]), %%rcx \n\t" | ||
| 1762 | "mov %c[rdx](%[svm]), %%rdx \n\t" | ||
| 1763 | "mov %c[rsi](%[svm]), %%rsi \n\t" | ||
| 1764 | "mov %c[rdi](%[svm]), %%rdi \n\t" | ||
| 1765 | "mov %c[rbp](%[svm]), %%rbp \n\t" | ||
| 1766 | "mov %c[r8](%[svm]), %%r8 \n\t" | 1762 | "mov %c[r8](%[svm]), %%r8 \n\t" |
| 1767 | "mov %c[r9](%[svm]), %%r9 \n\t" | 1763 | "mov %c[r9](%[svm]), %%r9 \n\t" |
| 1768 | "mov %c[r10](%[svm]), %%r10 \n\t" | 1764 | "mov %c[r10](%[svm]), %%r10 \n\t" |
| @@ -1771,41 +1767,24 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1771 | "mov %c[r13](%[svm]), %%r13 \n\t" | 1767 | "mov %c[r13](%[svm]), %%r13 \n\t" |
| 1772 | "mov %c[r14](%[svm]), %%r14 \n\t" | 1768 | "mov %c[r14](%[svm]), %%r14 \n\t" |
| 1773 | "mov %c[r15](%[svm]), %%r15 \n\t" | 1769 | "mov %c[r15](%[svm]), %%r15 \n\t" |
| 1774 | #else | ||
| 1775 | "mov %c[rbx](%[svm]), %%ebx \n\t" | ||
| 1776 | "mov %c[rcx](%[svm]), %%ecx \n\t" | ||
| 1777 | "mov %c[rdx](%[svm]), %%edx \n\t" | ||
| 1778 | "mov %c[rsi](%[svm]), %%esi \n\t" | ||
| 1779 | "mov %c[rdi](%[svm]), %%edi \n\t" | ||
| 1780 | "mov %c[rbp](%[svm]), %%ebp \n\t" | ||
| 1781 | #endif | 1770 | #endif |
| 1782 | 1771 | ||
| 1783 | #ifdef CONFIG_X86_64 | ||
| 1784 | /* Enter guest mode */ | ||
| 1785 | "push %%rax \n\t" | ||
| 1786 | "mov %c[vmcb](%[svm]), %%rax \n\t" | ||
| 1787 | __ex(SVM_VMLOAD) "\n\t" | ||
| 1788 | __ex(SVM_VMRUN) "\n\t" | ||
| 1789 | __ex(SVM_VMSAVE) "\n\t" | ||
| 1790 | "pop %%rax \n\t" | ||
| 1791 | #else | ||
| 1792 | /* Enter guest mode */ | 1772 | /* Enter guest mode */ |
| 1793 | "push %%eax \n\t" | 1773 | "push %%"R"ax \n\t" |
| 1794 | "mov %c[vmcb](%[svm]), %%eax \n\t" | 1774 | "mov %c[vmcb](%[svm]), %%"R"ax \n\t" |
| 1795 | __ex(SVM_VMLOAD) "\n\t" | 1775 | __ex(SVM_VMLOAD) "\n\t" |
| 1796 | __ex(SVM_VMRUN) "\n\t" | 1776 | __ex(SVM_VMRUN) "\n\t" |
| 1797 | __ex(SVM_VMSAVE) "\n\t" | 1777 | __ex(SVM_VMSAVE) "\n\t" |
| 1798 | "pop %%eax \n\t" | 1778 | "pop %%"R"ax \n\t" |
| 1799 | #endif | ||
| 1800 | 1779 | ||
| 1801 | /* Save guest registers, load host registers */ | 1780 | /* Save guest registers, load host registers */ |
| 1781 | "mov %%"R"bx, %c[rbx](%[svm]) \n\t" | ||
| 1782 | "mov %%"R"cx, %c[rcx](%[svm]) \n\t" | ||
| 1783 | "mov %%"R"dx, %c[rdx](%[svm]) \n\t" | ||
| 1784 | "mov %%"R"si, %c[rsi](%[svm]) \n\t" | ||
| 1785 | "mov %%"R"di, %c[rdi](%[svm]) \n\t" | ||
| 1786 | "mov %%"R"bp, %c[rbp](%[svm]) \n\t" | ||
| 1802 | #ifdef CONFIG_X86_64 | 1787 | #ifdef CONFIG_X86_64 |
| 1803 | "mov %%rbx, %c[rbx](%[svm]) \n\t" | ||
| 1804 | "mov %%rcx, %c[rcx](%[svm]) \n\t" | ||
| 1805 | "mov %%rdx, %c[rdx](%[svm]) \n\t" | ||
| 1806 | "mov %%rsi, %c[rsi](%[svm]) \n\t" | ||
| 1807 | "mov %%rdi, %c[rdi](%[svm]) \n\t" | ||
| 1808 | "mov %%rbp, %c[rbp](%[svm]) \n\t" | ||
| 1809 | "mov %%r8, %c[r8](%[svm]) \n\t" | 1788 | "mov %%r8, %c[r8](%[svm]) \n\t" |
| 1810 | "mov %%r9, %c[r9](%[svm]) \n\t" | 1789 | "mov %%r9, %c[r9](%[svm]) \n\t" |
| 1811 | "mov %%r10, %c[r10](%[svm]) \n\t" | 1790 | "mov %%r10, %c[r10](%[svm]) \n\t" |
| @@ -1814,18 +1793,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1814 | "mov %%r13, %c[r13](%[svm]) \n\t" | 1793 | "mov %%r13, %c[r13](%[svm]) \n\t" |
| 1815 | "mov %%r14, %c[r14](%[svm]) \n\t" | 1794 | "mov %%r14, %c[r14](%[svm]) \n\t" |
| 1816 | "mov %%r15, %c[r15](%[svm]) \n\t" | 1795 | "mov %%r15, %c[r15](%[svm]) \n\t" |
| 1817 | |||
| 1818 | "pop %%rbp; \n\t" | ||
| 1819 | #else | ||
| 1820 | "mov %%ebx, %c[rbx](%[svm]) \n\t" | ||
| 1821 | "mov %%ecx, %c[rcx](%[svm]) \n\t" | ||
| 1822 | "mov %%edx, %c[rdx](%[svm]) \n\t" | ||
| 1823 | "mov %%esi, %c[rsi](%[svm]) \n\t" | ||
| 1824 | "mov %%edi, %c[rdi](%[svm]) \n\t" | ||
| 1825 | "mov %%ebp, %c[rbp](%[svm]) \n\t" | ||
| 1826 | |||
| 1827 | "pop %%ebp; \n\t" | ||
| 1828 | #endif | 1796 | #endif |
| 1797 | "pop %%"R"bp" | ||
| 1829 | : | 1798 | : |
| 1830 | : [svm]"a"(svm), | 1799 | : [svm]"a"(svm), |
| 1831 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), | 1800 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), |
| @@ -1846,11 +1815,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1846 | [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) | 1815 | [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) |
| 1847 | #endif | 1816 | #endif |
| 1848 | : "cc", "memory" | 1817 | : "cc", "memory" |
| 1818 | , R"bx", R"cx", R"dx", R"si", R"di" | ||
| 1849 | #ifdef CONFIG_X86_64 | 1819 | #ifdef CONFIG_X86_64 |
| 1850 | , "rbx", "rcx", "rdx", "rsi", "rdi" | ||
| 1851 | , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" | 1820 | , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" |
| 1852 | #else | ||
| 1853 | , "ebx", "ecx", "edx" , "esi", "edi" | ||
| 1854 | #endif | 1821 | #endif |
| 1855 | ); | 1822 | ); |
| 1856 | 1823 | ||
| @@ -1858,6 +1825,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1858 | load_db_regs(svm->host_db_regs); | 1825 | load_db_regs(svm->host_db_regs); |
| 1859 | 1826 | ||
| 1860 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | 1827 | vcpu->arch.cr2 = svm->vmcb->save.cr2; |
| 1828 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | ||
| 1829 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
| 1830 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | ||
| 1861 | 1831 | ||
| 1862 | write_dr6(svm->host_dr6); | 1832 | write_dr6(svm->host_dr6); |
| 1863 | write_dr7(svm->host_dr7); | 1833 | write_dr7(svm->host_dr7); |
| @@ -1879,6 +1849,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1879 | svm->next_rip = 0; | 1849 | svm->next_rip = 0; |
| 1880 | } | 1850 | } |
| 1881 | 1851 | ||
| 1852 | #undef R | ||
| 1853 | |||
| 1882 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | 1854 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) |
| 1883 | { | 1855 | { |
| 1884 | struct vcpu_svm *svm = to_svm(vcpu); | 1856 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -1977,8 +1949,6 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 1977 | .set_gdt = svm_set_gdt, | 1949 | .set_gdt = svm_set_gdt, |
| 1978 | .get_dr = svm_get_dr, | 1950 | .get_dr = svm_get_dr, |
| 1979 | .set_dr = svm_set_dr, | 1951 | .set_dr = svm_set_dr, |
| 1980 | .cache_regs = svm_cache_regs, | ||
| 1981 | .decache_regs = svm_decache_regs, | ||
| 1982 | .get_rflags = svm_get_rflags, | 1952 | .get_rflags = svm_get_rflags, |
| 1983 | .set_rflags = svm_set_rflags, | 1953 | .set_rflags = svm_set_rflags, |
| 1984 | 1954 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7041cc52b562..2643b430d83a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -26,6 +26,8 @@ | |||
| 26 | #include <linux/highmem.h> | 26 | #include <linux/highmem.h> |
| 27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
| 28 | #include <linux/moduleparam.h> | 28 | #include <linux/moduleparam.h> |
| 29 | #include "kvm_cache_regs.h" | ||
| 30 | #include "x86.h" | ||
| 29 | 31 | ||
| 30 | #include <asm/io.h> | 32 | #include <asm/io.h> |
| 31 | #include <asm/desc.h> | 33 | #include <asm/desc.h> |
| @@ -47,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0); | |||
| 47 | static int enable_ept = 1; | 49 | static int enable_ept = 1; |
| 48 | module_param(enable_ept, bool, 0); | 50 | module_param(enable_ept, bool, 0); |
| 49 | 51 | ||
| 52 | static int emulate_invalid_guest_state = 0; | ||
| 53 | module_param(emulate_invalid_guest_state, bool, 0); | ||
| 54 | |||
| 50 | struct vmcs { | 55 | struct vmcs { |
| 51 | u32 revision_id; | 56 | u32 revision_id; |
| 52 | u32 abort; | 57 | u32 abort; |
| @@ -56,6 +61,7 @@ struct vmcs { | |||
| 56 | struct vcpu_vmx { | 61 | struct vcpu_vmx { |
| 57 | struct kvm_vcpu vcpu; | 62 | struct kvm_vcpu vcpu; |
| 58 | struct list_head local_vcpus_link; | 63 | struct list_head local_vcpus_link; |
| 64 | unsigned long host_rsp; | ||
| 59 | int launched; | 65 | int launched; |
| 60 | u8 fail; | 66 | u8 fail; |
| 61 | u32 idt_vectoring_info; | 67 | u32 idt_vectoring_info; |
| @@ -83,6 +89,7 @@ struct vcpu_vmx { | |||
| 83 | } irq; | 89 | } irq; |
| 84 | } rmode; | 90 | } rmode; |
| 85 | int vpid; | 91 | int vpid; |
| 92 | bool emulation_required; | ||
| 86 | }; | 93 | }; |
| 87 | 94 | ||
| 88 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 95 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
| @@ -468,7 +475,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 468 | if (!vcpu->fpu_active) | 475 | if (!vcpu->fpu_active) |
| 469 | eb |= 1u << NM_VECTOR; | 476 | eb |= 1u << NM_VECTOR; |
| 470 | if (vcpu->guest_debug.enabled) | 477 | if (vcpu->guest_debug.enabled) |
| 471 | eb |= 1u << 1; | 478 | eb |= 1u << DB_VECTOR; |
| 472 | if (vcpu->arch.rmode.active) | 479 | if (vcpu->arch.rmode.active) |
| 473 | eb = ~0; | 480 | eb = ~0; |
| 474 | if (vm_need_ept()) | 481 | if (vm_need_ept()) |
| @@ -715,9 +722,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 715 | unsigned long rip; | 722 | unsigned long rip; |
| 716 | u32 interruptibility; | 723 | u32 interruptibility; |
| 717 | 724 | ||
| 718 | rip = vmcs_readl(GUEST_RIP); | 725 | rip = kvm_rip_read(vcpu); |
| 719 | rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 726 | rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
| 720 | vmcs_writel(GUEST_RIP, rip); | 727 | kvm_rip_write(vcpu, rip); |
| 721 | 728 | ||
| 722 | /* | 729 | /* |
| 723 | * We emulated an instruction, so temporary interrupt blocking | 730 | * We emulated an instruction, so temporary interrupt blocking |
| @@ -733,19 +740,35 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 733 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 740 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
| 734 | bool has_error_code, u32 error_code) | 741 | bool has_error_code, u32 error_code) |
| 735 | { | 742 | { |
| 743 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 744 | |||
| 745 | if (has_error_code) | ||
| 746 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); | ||
| 747 | |||
| 748 | if (vcpu->arch.rmode.active) { | ||
| 749 | vmx->rmode.irq.pending = true; | ||
| 750 | vmx->rmode.irq.vector = nr; | ||
| 751 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | ||
| 752 | if (nr == BP_VECTOR) | ||
| 753 | vmx->rmode.irq.rip++; | ||
| 754 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
| 755 | nr | INTR_TYPE_SOFT_INTR | ||
| 756 | | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) | ||
| 757 | | INTR_INFO_VALID_MASK); | ||
| 758 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | ||
| 759 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | ||
| 760 | return; | ||
| 761 | } | ||
| 762 | |||
| 736 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 763 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| 737 | nr | INTR_TYPE_EXCEPTION | 764 | nr | INTR_TYPE_EXCEPTION |
| 738 | | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) | 765 | | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) |
| 739 | | INTR_INFO_VALID_MASK); | 766 | | INTR_INFO_VALID_MASK); |
| 740 | if (has_error_code) | ||
| 741 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); | ||
| 742 | } | 767 | } |
| 743 | 768 | ||
| 744 | static bool vmx_exception_injected(struct kvm_vcpu *vcpu) | 769 | static bool vmx_exception_injected(struct kvm_vcpu *vcpu) |
| 745 | { | 770 | { |
| 746 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 771 | return false; |
| 747 | |||
| 748 | return !(vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); | ||
| 749 | } | 772 | } |
| 750 | 773 | ||
| 751 | /* | 774 | /* |
| @@ -947,24 +970,19 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
| 947 | return ret; | 970 | return ret; |
| 948 | } | 971 | } |
| 949 | 972 | ||
| 950 | /* | 973 | static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) |
| 951 | * Sync the rsp and rip registers into the vcpu structure. This allows | ||
| 952 | * registers to be accessed by indexing vcpu->arch.regs. | ||
| 953 | */ | ||
| 954 | static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu) | ||
| 955 | { | ||
| 956 | vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); | ||
| 957 | vcpu->arch.rip = vmcs_readl(GUEST_RIP); | ||
| 958 | } | ||
| 959 | |||
| 960 | /* | ||
| 961 | * Syncs rsp and rip back into the vmcs. Should be called after possible | ||
| 962 | * modification. | ||
| 963 | */ | ||
| 964 | static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu) | ||
| 965 | { | 974 | { |
| 966 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 975 | __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); |
| 967 | vmcs_writel(GUEST_RIP, vcpu->arch.rip); | 976 | switch (reg) { |
| 977 | case VCPU_REGS_RSP: | ||
| 978 | vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); | ||
| 979 | break; | ||
| 980 | case VCPU_REGS_RIP: | ||
| 981 | vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); | ||
| 982 | break; | ||
| 983 | default: | ||
| 984 | break; | ||
| 985 | } | ||
| 968 | } | 986 | } |
| 969 | 987 | ||
| 970 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | 988 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) |
| @@ -1007,17 +1025,9 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | |||
| 1007 | 1025 | ||
| 1008 | static int vmx_get_irq(struct kvm_vcpu *vcpu) | 1026 | static int vmx_get_irq(struct kvm_vcpu *vcpu) |
| 1009 | { | 1027 | { |
| 1010 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1028 | if (!vcpu->arch.interrupt.pending) |
| 1011 | u32 idtv_info_field; | 1029 | return -1; |
| 1012 | 1030 | return vcpu->arch.interrupt.nr; | |
| 1013 | idtv_info_field = vmx->idt_vectoring_info; | ||
| 1014 | if (idtv_info_field & INTR_INFO_VALID_MASK) { | ||
| 1015 | if (is_external_interrupt(idtv_info_field)) | ||
| 1016 | return idtv_info_field & VECTORING_INFO_VECTOR_MASK; | ||
| 1017 | else | ||
| 1018 | printk(KERN_DEBUG "pending exception: not handled yet\n"); | ||
| 1019 | } | ||
| 1020 | return -1; | ||
| 1021 | } | 1031 | } |
| 1022 | 1032 | ||
| 1023 | static __init int cpu_has_kvm_support(void) | 1033 | static __init int cpu_has_kvm_support(void) |
| @@ -1031,9 +1041,9 @@ static __init int vmx_disabled_by_bios(void) | |||
| 1031 | u64 msr; | 1041 | u64 msr; |
| 1032 | 1042 | ||
| 1033 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1043 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
| 1034 | return (msr & (MSR_IA32_FEATURE_CONTROL_LOCKED | | 1044 | return (msr & (FEATURE_CONTROL_LOCKED | |
| 1035 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | 1045 | FEATURE_CONTROL_VMXON_ENABLED)) |
| 1036 | == MSR_IA32_FEATURE_CONTROL_LOCKED; | 1046 | == FEATURE_CONTROL_LOCKED; |
| 1037 | /* locked but not enabled */ | 1047 | /* locked but not enabled */ |
| 1038 | } | 1048 | } |
| 1039 | 1049 | ||
| @@ -1045,14 +1055,14 @@ static void hardware_enable(void *garbage) | |||
| 1045 | 1055 | ||
| 1046 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1056 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
| 1047 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1057 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
| 1048 | if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | | 1058 | if ((old & (FEATURE_CONTROL_LOCKED | |
| 1049 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | 1059 | FEATURE_CONTROL_VMXON_ENABLED)) |
| 1050 | != (MSR_IA32_FEATURE_CONTROL_LOCKED | | 1060 | != (FEATURE_CONTROL_LOCKED | |
| 1051 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | 1061 | FEATURE_CONTROL_VMXON_ENABLED)) |
| 1052 | /* enable and lock */ | 1062 | /* enable and lock */ |
| 1053 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | | 1063 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | |
| 1054 | MSR_IA32_FEATURE_CONTROL_LOCKED | | 1064 | FEATURE_CONTROL_LOCKED | |
| 1055 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); | 1065 | FEATURE_CONTROL_VMXON_ENABLED); |
| 1056 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1066 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
| 1057 | asm volatile (ASM_VMX_VMXON_RAX | 1067 | asm volatile (ASM_VMX_VMXON_RAX |
| 1058 | : : "a"(&phys_addr), "m"(phys_addr) | 1068 | : : "a"(&phys_addr), "m"(phys_addr) |
| @@ -1120,7 +1130,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 1120 | CPU_BASED_CR3_STORE_EXITING | | 1130 | CPU_BASED_CR3_STORE_EXITING | |
| 1121 | CPU_BASED_USE_IO_BITMAPS | | 1131 | CPU_BASED_USE_IO_BITMAPS | |
| 1122 | CPU_BASED_MOV_DR_EXITING | | 1132 | CPU_BASED_MOV_DR_EXITING | |
| 1123 | CPU_BASED_USE_TSC_OFFSETING; | 1133 | CPU_BASED_USE_TSC_OFFSETING | |
| 1134 | CPU_BASED_INVLPG_EXITING; | ||
| 1124 | opt = CPU_BASED_TPR_SHADOW | | 1135 | opt = CPU_BASED_TPR_SHADOW | |
| 1125 | CPU_BASED_USE_MSR_BITMAPS | | 1136 | CPU_BASED_USE_MSR_BITMAPS | |
| 1126 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1137 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
| @@ -1149,9 +1160,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 1149 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 1160 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
| 1150 | #endif | 1161 | #endif |
| 1151 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | 1162 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { |
| 1152 | /* CR3 accesses don't need to cause VM Exits when EPT enabled */ | 1163 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT |
| 1164 | enabled */ | ||
| 1153 | min &= ~(CPU_BASED_CR3_LOAD_EXITING | | 1165 | min &= ~(CPU_BASED_CR3_LOAD_EXITING | |
| 1154 | CPU_BASED_CR3_STORE_EXITING); | 1166 | CPU_BASED_CR3_STORE_EXITING | |
| 1167 | CPU_BASED_INVLPG_EXITING); | ||
| 1155 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, | 1168 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, |
| 1156 | &_cpu_based_exec_control) < 0) | 1169 | &_cpu_based_exec_control) < 0) |
| 1157 | return -EIO; | 1170 | return -EIO; |
| @@ -1288,7 +1301,9 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) | |||
| 1288 | static void enter_pmode(struct kvm_vcpu *vcpu) | 1301 | static void enter_pmode(struct kvm_vcpu *vcpu) |
| 1289 | { | 1302 | { |
| 1290 | unsigned long flags; | 1303 | unsigned long flags; |
| 1304 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 1291 | 1305 | ||
| 1306 | vmx->emulation_required = 1; | ||
| 1292 | vcpu->arch.rmode.active = 0; | 1307 | vcpu->arch.rmode.active = 0; |
| 1293 | 1308 | ||
| 1294 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); | 1309 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); |
| @@ -1305,6 +1320,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
| 1305 | 1320 | ||
| 1306 | update_exception_bitmap(vcpu); | 1321 | update_exception_bitmap(vcpu); |
| 1307 | 1322 | ||
| 1323 | if (emulate_invalid_guest_state) | ||
| 1324 | return; | ||
| 1325 | |||
| 1308 | fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es); | 1326 | fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es); |
| 1309 | fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); | 1327 | fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); |
| 1310 | fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); | 1328 | fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); |
| @@ -1345,7 +1363,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) | |||
| 1345 | static void enter_rmode(struct kvm_vcpu *vcpu) | 1363 | static void enter_rmode(struct kvm_vcpu *vcpu) |
| 1346 | { | 1364 | { |
| 1347 | unsigned long flags; | 1365 | unsigned long flags; |
| 1366 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 1348 | 1367 | ||
| 1368 | vmx->emulation_required = 1; | ||
| 1349 | vcpu->arch.rmode.active = 1; | 1369 | vcpu->arch.rmode.active = 1; |
| 1350 | 1370 | ||
| 1351 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1371 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
| @@ -1367,6 +1387,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 1367 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); | 1387 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); |
| 1368 | update_exception_bitmap(vcpu); | 1388 | update_exception_bitmap(vcpu); |
| 1369 | 1389 | ||
| 1390 | if (emulate_invalid_guest_state) | ||
| 1391 | goto continue_rmode; | ||
| 1392 | |||
| 1370 | vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); | 1393 | vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); |
| 1371 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | 1394 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); |
| 1372 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | 1395 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); |
| @@ -1382,6 +1405,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 1382 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); | 1405 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); |
| 1383 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); | 1406 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); |
| 1384 | 1407 | ||
| 1408 | continue_rmode: | ||
| 1385 | kvm_mmu_reset_context(vcpu); | 1409 | kvm_mmu_reset_context(vcpu); |
| 1386 | init_rmode(vcpu->kvm); | 1410 | init_rmode(vcpu->kvm); |
| 1387 | } | 1411 | } |
| @@ -1715,6 +1739,186 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | |||
| 1715 | vmcs_writel(GUEST_GDTR_BASE, dt->base); | 1739 | vmcs_writel(GUEST_GDTR_BASE, dt->base); |
| 1716 | } | 1740 | } |
| 1717 | 1741 | ||
| 1742 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | ||
| 1743 | { | ||
| 1744 | struct kvm_segment var; | ||
| 1745 | u32 ar; | ||
| 1746 | |||
| 1747 | vmx_get_segment(vcpu, &var, seg); | ||
| 1748 | ar = vmx_segment_access_rights(&var); | ||
| 1749 | |||
| 1750 | if (var.base != (var.selector << 4)) | ||
| 1751 | return false; | ||
| 1752 | if (var.limit != 0xffff) | ||
| 1753 | return false; | ||
| 1754 | if (ar != 0xf3) | ||
| 1755 | return false; | ||
| 1756 | |||
| 1757 | return true; | ||
| 1758 | } | ||
| 1759 | |||
| 1760 | static bool code_segment_valid(struct kvm_vcpu *vcpu) | ||
| 1761 | { | ||
| 1762 | struct kvm_segment cs; | ||
| 1763 | unsigned int cs_rpl; | ||
| 1764 | |||
| 1765 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
| 1766 | cs_rpl = cs.selector & SELECTOR_RPL_MASK; | ||
| 1767 | |||
| 1768 | if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) | ||
| 1769 | return false; | ||
| 1770 | if (!cs.s) | ||
| 1771 | return false; | ||
| 1772 | if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { | ||
| 1773 | if (cs.dpl > cs_rpl) | ||
| 1774 | return false; | ||
| 1775 | } else if (cs.type & AR_TYPE_CODE_MASK) { | ||
| 1776 | if (cs.dpl != cs_rpl) | ||
| 1777 | return false; | ||
| 1778 | } | ||
| 1779 | if (!cs.present) | ||
| 1780 | return false; | ||
| 1781 | |||
| 1782 | /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ | ||
| 1783 | return true; | ||
| 1784 | } | ||
| 1785 | |||
| 1786 | static bool stack_segment_valid(struct kvm_vcpu *vcpu) | ||
| 1787 | { | ||
| 1788 | struct kvm_segment ss; | ||
| 1789 | unsigned int ss_rpl; | ||
| 1790 | |||
| 1791 | vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); | ||
| 1792 | ss_rpl = ss.selector & SELECTOR_RPL_MASK; | ||
| 1793 | |||
| 1794 | if ((ss.type != 3) || (ss.type != 7)) | ||
| 1795 | return false; | ||
| 1796 | if (!ss.s) | ||
| 1797 | return false; | ||
| 1798 | if (ss.dpl != ss_rpl) /* DPL != RPL */ | ||
| 1799 | return false; | ||
| 1800 | if (!ss.present) | ||
| 1801 | return false; | ||
| 1802 | |||
| 1803 | return true; | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) | ||
| 1807 | { | ||
| 1808 | struct kvm_segment var; | ||
| 1809 | unsigned int rpl; | ||
| 1810 | |||
| 1811 | vmx_get_segment(vcpu, &var, seg); | ||
| 1812 | rpl = var.selector & SELECTOR_RPL_MASK; | ||
| 1813 | |||
| 1814 | if (!var.s) | ||
| 1815 | return false; | ||
| 1816 | if (!var.present) | ||
| 1817 | return false; | ||
| 1818 | if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) { | ||
| 1819 | if (var.dpl < rpl) /* DPL < RPL */ | ||
| 1820 | return false; | ||
| 1821 | } | ||
| 1822 | |||
| 1823 | /* TODO: Add other members to kvm_segment_field to allow checking for other access | ||
| 1824 | * rights flags | ||
| 1825 | */ | ||
| 1826 | return true; | ||
| 1827 | } | ||
| 1828 | |||
| 1829 | static bool tr_valid(struct kvm_vcpu *vcpu) | ||
| 1830 | { | ||
| 1831 | struct kvm_segment tr; | ||
| 1832 | |||
| 1833 | vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); | ||
| 1834 | |||
| 1835 | if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ | ||
| 1836 | return false; | ||
| 1837 | if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ | ||
| 1838 | return false; | ||
| 1839 | if (!tr.present) | ||
| 1840 | return false; | ||
| 1841 | |||
| 1842 | return true; | ||
| 1843 | } | ||
| 1844 | |||
| 1845 | static bool ldtr_valid(struct kvm_vcpu *vcpu) | ||
| 1846 | { | ||
| 1847 | struct kvm_segment ldtr; | ||
| 1848 | |||
| 1849 | vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); | ||
| 1850 | |||
| 1851 | if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ | ||
| 1852 | return false; | ||
| 1853 | if (ldtr.type != 2) | ||
| 1854 | return false; | ||
| 1855 | if (!ldtr.present) | ||
| 1856 | return false; | ||
| 1857 | |||
| 1858 | return true; | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | ||
| 1862 | { | ||
| 1863 | struct kvm_segment cs, ss; | ||
| 1864 | |||
| 1865 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
| 1866 | vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); | ||
| 1867 | |||
| 1868 | return ((cs.selector & SELECTOR_RPL_MASK) == | ||
| 1869 | (ss.selector & SELECTOR_RPL_MASK)); | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | /* | ||
| 1873 | * Check if guest state is valid. Returns true if valid, false if | ||
| 1874 | * not. | ||
| 1875 | * We assume that registers are always usable | ||
| 1876 | */ | ||
| 1877 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | ||
| 1878 | { | ||
| 1879 | /* real mode guest state checks */ | ||
| 1880 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) { | ||
| 1881 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | ||
| 1882 | return false; | ||
| 1883 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | ||
| 1884 | return false; | ||
| 1885 | if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) | ||
| 1886 | return false; | ||
| 1887 | if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) | ||
| 1888 | return false; | ||
| 1889 | if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) | ||
| 1890 | return false; | ||
| 1891 | if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) | ||
| 1892 | return false; | ||
| 1893 | } else { | ||
| 1894 | /* protected mode guest state checks */ | ||
| 1895 | if (!cs_ss_rpl_check(vcpu)) | ||
| 1896 | return false; | ||
| 1897 | if (!code_segment_valid(vcpu)) | ||
| 1898 | return false; | ||
| 1899 | if (!stack_segment_valid(vcpu)) | ||
| 1900 | return false; | ||
| 1901 | if (!data_segment_valid(vcpu, VCPU_SREG_DS)) | ||
| 1902 | return false; | ||
| 1903 | if (!data_segment_valid(vcpu, VCPU_SREG_ES)) | ||
| 1904 | return false; | ||
| 1905 | if (!data_segment_valid(vcpu, VCPU_SREG_FS)) | ||
| 1906 | return false; | ||
| 1907 | if (!data_segment_valid(vcpu, VCPU_SREG_GS)) | ||
| 1908 | return false; | ||
| 1909 | if (!tr_valid(vcpu)) | ||
| 1910 | return false; | ||
| 1911 | if (!ldtr_valid(vcpu)) | ||
| 1912 | return false; | ||
| 1913 | } | ||
| 1914 | /* TODO: | ||
| 1915 | * - Add checks on RIP | ||
| 1916 | * - Add checks on RFLAGS | ||
| 1917 | */ | ||
| 1918 | |||
| 1919 | return true; | ||
| 1920 | } | ||
| 1921 | |||
| 1718 | static int init_rmode_tss(struct kvm *kvm) | 1922 | static int init_rmode_tss(struct kvm *kvm) |
| 1719 | { | 1923 | { |
| 1720 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 1924 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; |
| @@ -1726,7 +1930,8 @@ static int init_rmode_tss(struct kvm *kvm) | |||
| 1726 | if (r < 0) | 1930 | if (r < 0) |
| 1727 | goto out; | 1931 | goto out; |
| 1728 | data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; | 1932 | data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; |
| 1729 | r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16)); | 1933 | r = kvm_write_guest_page(kvm, fn++, &data, |
| 1934 | TSS_IOPB_BASE_OFFSET, sizeof(u16)); | ||
| 1730 | if (r < 0) | 1935 | if (r < 0) |
| 1731 | goto out; | 1936 | goto out; |
| 1732 | r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); | 1937 | r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); |
| @@ -1789,7 +1994,7 @@ static void seg_setup(int seg) | |||
| 1789 | vmcs_write16(sf->selector, 0); | 1994 | vmcs_write16(sf->selector, 0); |
| 1790 | vmcs_writel(sf->base, 0); | 1995 | vmcs_writel(sf->base, 0); |
| 1791 | vmcs_write32(sf->limit, 0xffff); | 1996 | vmcs_write32(sf->limit, 0xffff); |
| 1792 | vmcs_write32(sf->ar_bytes, 0x93); | 1997 | vmcs_write32(sf->ar_bytes, 0xf3); |
| 1793 | } | 1998 | } |
| 1794 | 1999 | ||
| 1795 | static int alloc_apic_access_page(struct kvm *kvm) | 2000 | static int alloc_apic_access_page(struct kvm *kvm) |
| @@ -1808,9 +2013,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
| 1808 | if (r) | 2013 | if (r) |
| 1809 | goto out; | 2014 | goto out; |
| 1810 | 2015 | ||
| 1811 | down_read(¤t->mm->mmap_sem); | ||
| 1812 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 2016 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
| 1813 | up_read(¤t->mm->mmap_sem); | ||
| 1814 | out: | 2017 | out: |
| 1815 | up_write(&kvm->slots_lock); | 2018 | up_write(&kvm->slots_lock); |
| 1816 | return r; | 2019 | return r; |
| @@ -1832,10 +2035,8 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
| 1832 | if (r) | 2035 | if (r) |
| 1833 | goto out; | 2036 | goto out; |
| 1834 | 2037 | ||
| 1835 | down_read(¤t->mm->mmap_sem); | ||
| 1836 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | 2038 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, |
| 1837 | VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); | 2039 | VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); |
| 1838 | up_read(¤t->mm->mmap_sem); | ||
| 1839 | out: | 2040 | out: |
| 1840 | up_write(&kvm->slots_lock); | 2041 | up_write(&kvm->slots_lock); |
| 1841 | return r; | 2042 | return r; |
| @@ -1917,7 +2118,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 1917 | } | 2118 | } |
| 1918 | if (!vm_need_ept()) | 2119 | if (!vm_need_ept()) |
| 1919 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | 2120 | exec_control |= CPU_BASED_CR3_STORE_EXITING | |
| 1920 | CPU_BASED_CR3_LOAD_EXITING; | 2121 | CPU_BASED_CR3_LOAD_EXITING | |
| 2122 | CPU_BASED_INVLPG_EXITING; | ||
| 1921 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | 2123 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); |
| 1922 | 2124 | ||
| 1923 | if (cpu_has_secondary_exec_ctrls()) { | 2125 | if (cpu_has_secondary_exec_ctrls()) { |
| @@ -2019,6 +2221,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2019 | u64 msr; | 2221 | u64 msr; |
| 2020 | int ret; | 2222 | int ret; |
| 2021 | 2223 | ||
| 2224 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | ||
| 2022 | down_read(&vcpu->kvm->slots_lock); | 2225 | down_read(&vcpu->kvm->slots_lock); |
| 2023 | if (!init_rmode(vmx->vcpu.kvm)) { | 2226 | if (!init_rmode(vmx->vcpu.kvm)) { |
| 2024 | ret = -ENOMEM; | 2227 | ret = -ENOMEM; |
| @@ -2036,6 +2239,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2036 | 2239 | ||
| 2037 | fx_init(&vmx->vcpu); | 2240 | fx_init(&vmx->vcpu); |
| 2038 | 2241 | ||
| 2242 | seg_setup(VCPU_SREG_CS); | ||
| 2039 | /* | 2243 | /* |
| 2040 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | 2244 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode |
| 2041 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. | 2245 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. |
| @@ -2047,8 +2251,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2047 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); | 2251 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); |
| 2048 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); | 2252 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); |
| 2049 | } | 2253 | } |
| 2050 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | ||
| 2051 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); | ||
| 2052 | 2254 | ||
| 2053 | seg_setup(VCPU_SREG_DS); | 2255 | seg_setup(VCPU_SREG_DS); |
| 2054 | seg_setup(VCPU_SREG_ES); | 2256 | seg_setup(VCPU_SREG_ES); |
| @@ -2072,10 +2274,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2072 | 2274 | ||
| 2073 | vmcs_writel(GUEST_RFLAGS, 0x02); | 2275 | vmcs_writel(GUEST_RFLAGS, 0x02); |
| 2074 | if (vmx->vcpu.vcpu_id == 0) | 2276 | if (vmx->vcpu.vcpu_id == 0) |
| 2075 | vmcs_writel(GUEST_RIP, 0xfff0); | 2277 | kvm_rip_write(vcpu, 0xfff0); |
| 2076 | else | 2278 | else |
| 2077 | vmcs_writel(GUEST_RIP, 0); | 2279 | kvm_rip_write(vcpu, 0); |
| 2078 | vmcs_writel(GUEST_RSP, 0); | 2280 | kvm_register_write(vcpu, VCPU_REGS_RSP, 0); |
| 2079 | 2281 | ||
| 2080 | /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ | 2282 | /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ |
| 2081 | vmcs_writel(GUEST_DR7, 0x400); | 2283 | vmcs_writel(GUEST_DR7, 0x400); |
| @@ -2125,6 +2327,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2125 | 2327 | ||
| 2126 | ret = 0; | 2328 | ret = 0; |
| 2127 | 2329 | ||
| 2330 | /* HACK: Don't enable emulation on guest boot/reset */ | ||
| 2331 | vmx->emulation_required = 0; | ||
| 2332 | |||
| 2128 | out: | 2333 | out: |
| 2129 | up_read(&vcpu->kvm->slots_lock); | 2334 | up_read(&vcpu->kvm->slots_lock); |
| 2130 | return ret; | 2335 | return ret; |
| @@ -2136,14 +2341,15 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | |||
| 2136 | 2341 | ||
| 2137 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); | 2342 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); |
| 2138 | 2343 | ||
| 2344 | ++vcpu->stat.irq_injections; | ||
| 2139 | if (vcpu->arch.rmode.active) { | 2345 | if (vcpu->arch.rmode.active) { |
| 2140 | vmx->rmode.irq.pending = true; | 2346 | vmx->rmode.irq.pending = true; |
| 2141 | vmx->rmode.irq.vector = irq; | 2347 | vmx->rmode.irq.vector = irq; |
| 2142 | vmx->rmode.irq.rip = vmcs_readl(GUEST_RIP); | 2348 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
| 2143 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2349 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| 2144 | irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); | 2350 | irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); |
| 2145 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | 2351 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); |
| 2146 | vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip - 1); | 2352 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); |
| 2147 | return; | 2353 | return; |
| 2148 | } | 2354 | } |
| 2149 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2355 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| @@ -2154,7 +2360,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
| 2154 | { | 2360 | { |
| 2155 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2361 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| 2156 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2362 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
| 2157 | vcpu->arch.nmi_pending = 0; | ||
| 2158 | } | 2363 | } |
| 2159 | 2364 | ||
| 2160 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | 2365 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) |
| @@ -2166,7 +2371,7 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | |||
| 2166 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); | 2371 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); |
| 2167 | if (!vcpu->arch.irq_pending[word_index]) | 2372 | if (!vcpu->arch.irq_pending[word_index]) |
| 2168 | clear_bit(word_index, &vcpu->arch.irq_summary); | 2373 | clear_bit(word_index, &vcpu->arch.irq_summary); |
| 2169 | vmx_inject_irq(vcpu, irq); | 2374 | kvm_queue_interrupt(vcpu, irq); |
| 2170 | } | 2375 | } |
| 2171 | 2376 | ||
| 2172 | 2377 | ||
| @@ -2180,13 +2385,12 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, | |||
| 2180 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); | 2385 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); |
| 2181 | 2386 | ||
| 2182 | if (vcpu->arch.interrupt_window_open && | 2387 | if (vcpu->arch.interrupt_window_open && |
| 2183 | vcpu->arch.irq_summary && | 2388 | vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) |
| 2184 | !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) | ||
| 2185 | /* | ||
| 2186 | * If interrupts enabled, and not blocked by sti or mov ss. Good. | ||
| 2187 | */ | ||
| 2188 | kvm_do_inject_irq(vcpu); | 2389 | kvm_do_inject_irq(vcpu); |
| 2189 | 2390 | ||
| 2391 | if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) | ||
| 2392 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | ||
| 2393 | |||
| 2190 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 2394 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
| 2191 | if (!vcpu->arch.interrupt_window_open && | 2395 | if (!vcpu->arch.interrupt_window_open && |
| 2192 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) | 2396 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) |
| @@ -2237,9 +2441,6 @@ static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) | |||
| 2237 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, | 2441 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, |
| 2238 | int vec, u32 err_code) | 2442 | int vec, u32 err_code) |
| 2239 | { | 2443 | { |
| 2240 | if (!vcpu->arch.rmode.active) | ||
| 2241 | return 0; | ||
| 2242 | |||
| 2243 | /* | 2444 | /* |
| 2244 | * Instruction with address size override prefix opcode 0x67 | 2445 | * Instruction with address size override prefix opcode 0x67 |
| 2245 | * Cause the #SS fault with 0 error code in VM86 mode. | 2446 | * Cause the #SS fault with 0 error code in VM86 mode. |
| @@ -2247,6 +2448,25 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
| 2247 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2448 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
| 2248 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) | 2449 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) |
| 2249 | return 1; | 2450 | return 1; |
| 2451 | /* | ||
| 2452 | * Forward all other exceptions that are valid in real mode. | ||
| 2453 | * FIXME: Breaks guest debugging in real mode, needs to be fixed with | ||
| 2454 | * the required debugging infrastructure rework. | ||
| 2455 | */ | ||
| 2456 | switch (vec) { | ||
| 2457 | case DE_VECTOR: | ||
| 2458 | case DB_VECTOR: | ||
| 2459 | case BP_VECTOR: | ||
| 2460 | case OF_VECTOR: | ||
| 2461 | case BR_VECTOR: | ||
| 2462 | case UD_VECTOR: | ||
| 2463 | case DF_VECTOR: | ||
| 2464 | case SS_VECTOR: | ||
| 2465 | case GP_VECTOR: | ||
| 2466 | case MF_VECTOR: | ||
| 2467 | kvm_queue_exception(vcpu, vec); | ||
| 2468 | return 1; | ||
| 2469 | } | ||
| 2250 | return 0; | 2470 | return 0; |
| 2251 | } | 2471 | } |
| 2252 | 2472 | ||
| @@ -2288,7 +2508,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2288 | } | 2508 | } |
| 2289 | 2509 | ||
| 2290 | error_code = 0; | 2510 | error_code = 0; |
| 2291 | rip = vmcs_readl(GUEST_RIP); | 2511 | rip = kvm_rip_read(vcpu); |
| 2292 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 2512 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
| 2293 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 2513 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
| 2294 | if (is_page_fault(intr_info)) { | 2514 | if (is_page_fault(intr_info)) { |
| @@ -2298,7 +2518,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2518 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
| 2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2519 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
| 2300 | (u32)((u64)cr2 >> 32), handler); | 2520 | (u32)((u64)cr2 >> 32), handler); |
| 2301 | if (vect_info & VECTORING_INFO_VALID_MASK) | 2521 | if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending) |
| 2302 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 2522 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
| 2303 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2523 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
| 2304 | } | 2524 | } |
| @@ -2386,27 +2606,25 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2386 | reg = (exit_qualification >> 8) & 15; | 2606 | reg = (exit_qualification >> 8) & 15; |
| 2387 | switch ((exit_qualification >> 4) & 3) { | 2607 | switch ((exit_qualification >> 4) & 3) { |
| 2388 | case 0: /* mov to cr */ | 2608 | case 0: /* mov to cr */ |
| 2389 | KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg], | 2609 | KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, |
| 2390 | (u32)((u64)vcpu->arch.regs[reg] >> 32), handler); | 2610 | (u32)kvm_register_read(vcpu, reg), |
| 2611 | (u32)((u64)kvm_register_read(vcpu, reg) >> 32), | ||
| 2612 | handler); | ||
| 2391 | switch (cr) { | 2613 | switch (cr) { |
| 2392 | case 0: | 2614 | case 0: |
| 2393 | vcpu_load_rsp_rip(vcpu); | 2615 | kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg)); |
| 2394 | kvm_set_cr0(vcpu, vcpu->arch.regs[reg]); | ||
| 2395 | skip_emulated_instruction(vcpu); | 2616 | skip_emulated_instruction(vcpu); |
| 2396 | return 1; | 2617 | return 1; |
| 2397 | case 3: | 2618 | case 3: |
| 2398 | vcpu_load_rsp_rip(vcpu); | 2619 | kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg)); |
| 2399 | kvm_set_cr3(vcpu, vcpu->arch.regs[reg]); | ||
| 2400 | skip_emulated_instruction(vcpu); | 2620 | skip_emulated_instruction(vcpu); |
| 2401 | return 1; | 2621 | return 1; |
| 2402 | case 4: | 2622 | case 4: |
| 2403 | vcpu_load_rsp_rip(vcpu); | 2623 | kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); |
| 2404 | kvm_set_cr4(vcpu, vcpu->arch.regs[reg]); | ||
| 2405 | skip_emulated_instruction(vcpu); | 2624 | skip_emulated_instruction(vcpu); |
| 2406 | return 1; | 2625 | return 1; |
| 2407 | case 8: | 2626 | case 8: |
| 2408 | vcpu_load_rsp_rip(vcpu); | 2627 | kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg)); |
| 2409 | kvm_set_cr8(vcpu, vcpu->arch.regs[reg]); | ||
| 2410 | skip_emulated_instruction(vcpu); | 2628 | skip_emulated_instruction(vcpu); |
| 2411 | if (irqchip_in_kernel(vcpu->kvm)) | 2629 | if (irqchip_in_kernel(vcpu->kvm)) |
| 2412 | return 1; | 2630 | return 1; |
| @@ -2415,7 +2633,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2415 | }; | 2633 | }; |
| 2416 | break; | 2634 | break; |
| 2417 | case 2: /* clts */ | 2635 | case 2: /* clts */ |
| 2418 | vcpu_load_rsp_rip(vcpu); | ||
| 2419 | vmx_fpu_deactivate(vcpu); | 2636 | vmx_fpu_deactivate(vcpu); |
| 2420 | vcpu->arch.cr0 &= ~X86_CR0_TS; | 2637 | vcpu->arch.cr0 &= ~X86_CR0_TS; |
| 2421 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | 2638 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); |
| @@ -2426,21 +2643,17 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2426 | case 1: /*mov from cr*/ | 2643 | case 1: /*mov from cr*/ |
| 2427 | switch (cr) { | 2644 | switch (cr) { |
| 2428 | case 3: | 2645 | case 3: |
| 2429 | vcpu_load_rsp_rip(vcpu); | 2646 | kvm_register_write(vcpu, reg, vcpu->arch.cr3); |
| 2430 | vcpu->arch.regs[reg] = vcpu->arch.cr3; | ||
| 2431 | vcpu_put_rsp_rip(vcpu); | ||
| 2432 | KVMTRACE_3D(CR_READ, vcpu, (u32)cr, | 2647 | KVMTRACE_3D(CR_READ, vcpu, (u32)cr, |
| 2433 | (u32)vcpu->arch.regs[reg], | 2648 | (u32)kvm_register_read(vcpu, reg), |
| 2434 | (u32)((u64)vcpu->arch.regs[reg] >> 32), | 2649 | (u32)((u64)kvm_register_read(vcpu, reg) >> 32), |
| 2435 | handler); | 2650 | handler); |
| 2436 | skip_emulated_instruction(vcpu); | 2651 | skip_emulated_instruction(vcpu); |
| 2437 | return 1; | 2652 | return 1; |
| 2438 | case 8: | 2653 | case 8: |
| 2439 | vcpu_load_rsp_rip(vcpu); | 2654 | kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu)); |
| 2440 | vcpu->arch.regs[reg] = kvm_get_cr8(vcpu); | ||
| 2441 | vcpu_put_rsp_rip(vcpu); | ||
| 2442 | KVMTRACE_2D(CR_READ, vcpu, (u32)cr, | 2655 | KVMTRACE_2D(CR_READ, vcpu, (u32)cr, |
| 2443 | (u32)vcpu->arch.regs[reg], handler); | 2656 | (u32)kvm_register_read(vcpu, reg), handler); |
| 2444 | skip_emulated_instruction(vcpu); | 2657 | skip_emulated_instruction(vcpu); |
| 2445 | return 1; | 2658 | return 1; |
| 2446 | } | 2659 | } |
| @@ -2472,7 +2685,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2472 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 2685 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 2473 | dr = exit_qualification & 7; | 2686 | dr = exit_qualification & 7; |
| 2474 | reg = (exit_qualification >> 8) & 15; | 2687 | reg = (exit_qualification >> 8) & 15; |
| 2475 | vcpu_load_rsp_rip(vcpu); | ||
| 2476 | if (exit_qualification & 16) { | 2688 | if (exit_qualification & 16) { |
| 2477 | /* mov from dr */ | 2689 | /* mov from dr */ |
| 2478 | switch (dr) { | 2690 | switch (dr) { |
| @@ -2485,12 +2697,11 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2485 | default: | 2697 | default: |
| 2486 | val = 0; | 2698 | val = 0; |
| 2487 | } | 2699 | } |
| 2488 | vcpu->arch.regs[reg] = val; | 2700 | kvm_register_write(vcpu, reg, val); |
| 2489 | KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); | 2701 | KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); |
| 2490 | } else { | 2702 | } else { |
| 2491 | /* mov to dr */ | 2703 | /* mov to dr */ |
| 2492 | } | 2704 | } |
| 2493 | vcpu_put_rsp_rip(vcpu); | ||
| 2494 | skip_emulated_instruction(vcpu); | 2705 | skip_emulated_instruction(vcpu); |
| 2495 | return 1; | 2706 | return 1; |
| 2496 | } | 2707 | } |
| @@ -2583,6 +2794,15 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2583 | return 1; | 2794 | return 1; |
| 2584 | } | 2795 | } |
| 2585 | 2796 | ||
| 2797 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
| 2798 | { | ||
| 2799 | u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | ||
| 2800 | |||
| 2801 | kvm_mmu_invlpg(vcpu, exit_qualification); | ||
| 2802 | skip_emulated_instruction(vcpu); | ||
| 2803 | return 1; | ||
| 2804 | } | ||
| 2805 | |||
| 2586 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2806 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2587 | { | 2807 | { |
| 2588 | skip_emulated_instruction(vcpu); | 2808 | skip_emulated_instruction(vcpu); |
| @@ -2695,6 +2915,43 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2695 | return 1; | 2915 | return 1; |
| 2696 | } | 2916 | } |
| 2697 | 2917 | ||
| 2918 | static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | ||
| 2919 | struct kvm_run *kvm_run) | ||
| 2920 | { | ||
| 2921 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 2922 | int err; | ||
| 2923 | |||
| 2924 | preempt_enable(); | ||
| 2925 | local_irq_enable(); | ||
| 2926 | |||
| 2927 | while (!guest_state_valid(vcpu)) { | ||
| 2928 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | ||
| 2929 | |||
| 2930 | switch (err) { | ||
| 2931 | case EMULATE_DONE: | ||
| 2932 | break; | ||
| 2933 | case EMULATE_DO_MMIO: | ||
| 2934 | kvm_report_emulation_failure(vcpu, "mmio"); | ||
| 2935 | /* TODO: Handle MMIO */ | ||
| 2936 | return; | ||
| 2937 | default: | ||
| 2938 | kvm_report_emulation_failure(vcpu, "emulation failure"); | ||
| 2939 | return; | ||
| 2940 | } | ||
| 2941 | |||
| 2942 | if (signal_pending(current)) | ||
| 2943 | break; | ||
| 2944 | if (need_resched()) | ||
| 2945 | schedule(); | ||
| 2946 | } | ||
| 2947 | |||
| 2948 | local_irq_disable(); | ||
| 2949 | preempt_disable(); | ||
| 2950 | |||
| 2951 | /* Guest state should be valid now, no more emulation should be needed */ | ||
| 2952 | vmx->emulation_required = 0; | ||
| 2953 | } | ||
| 2954 | |||
| 2698 | /* | 2955 | /* |
| 2699 | * The exit handlers return 1 if the exit was handled fully and guest execution | 2956 | * The exit handlers return 1 if the exit was handled fully and guest execution |
| 2700 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 2957 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
| @@ -2714,6 +2971,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
| 2714 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, | 2971 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, |
| 2715 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, | 2972 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, |
| 2716 | [EXIT_REASON_HLT] = handle_halt, | 2973 | [EXIT_REASON_HLT] = handle_halt, |
| 2974 | [EXIT_REASON_INVLPG] = handle_invlpg, | ||
| 2717 | [EXIT_REASON_VMCALL] = handle_vmcall, | 2975 | [EXIT_REASON_VMCALL] = handle_vmcall, |
| 2718 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 2976 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
| 2719 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 2977 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
| @@ -2735,8 +2993,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 2735 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2993 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2736 | u32 vectoring_info = vmx->idt_vectoring_info; | 2994 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 2737 | 2995 | ||
| 2738 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), | 2996 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), |
| 2739 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); | 2997 | (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); |
| 2740 | 2998 | ||
| 2741 | /* Access CR3 don't cause VMExit in paging mode, so we need | 2999 | /* Access CR3 don't cause VMExit in paging mode, so we need |
| 2742 | * to sync with guest real CR3. */ | 3000 | * to sync with guest real CR3. */ |
| @@ -2829,88 +3087,92 @@ static void enable_intr_window(struct kvm_vcpu *vcpu) | |||
| 2829 | enable_irq_window(vcpu); | 3087 | enable_irq_window(vcpu); |
| 2830 | } | 3088 | } |
| 2831 | 3089 | ||
| 2832 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) | 3090 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
| 2833 | { | 3091 | { |
| 2834 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3092 | u32 exit_intr_info; |
| 2835 | u32 idtv_info_field, intr_info_field, exit_intr_info_field; | 3093 | u32 idt_vectoring_info; |
| 2836 | int vector; | 3094 | bool unblock_nmi; |
| 3095 | u8 vector; | ||
| 3096 | int type; | ||
| 3097 | bool idtv_info_valid; | ||
| 3098 | u32 error; | ||
| 2837 | 3099 | ||
| 2838 | update_tpr_threshold(vcpu); | 3100 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
| 2839 | 3101 | if (cpu_has_virtual_nmis()) { | |
| 2840 | intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); | 3102 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; |
| 2841 | exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO); | 3103 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; |
| 2842 | idtv_info_field = vmx->idt_vectoring_info; | 3104 | /* |
| 2843 | if (intr_info_field & INTR_INFO_VALID_MASK) { | 3105 | * SDM 3: 25.7.1.2 |
| 2844 | if (idtv_info_field & INTR_INFO_VALID_MASK) { | 3106 | * Re-set bit "block by NMI" before VM entry if vmexit caused by |
| 2845 | /* TODO: fault when IDT_Vectoring */ | 3107 | * a guest IRET fault. |
| 2846 | if (printk_ratelimit()) | 3108 | */ |
| 2847 | printk(KERN_ERR "Fault when IDT_Vectoring\n"); | 3109 | if (unblock_nmi && vector != DF_VECTOR) |
| 2848 | } | 3110 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
| 2849 | enable_intr_window(vcpu); | 3111 | GUEST_INTR_STATE_NMI); |
| 2850 | return; | ||
| 2851 | } | 3112 | } |
| 2852 | if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { | ||
| 2853 | if ((idtv_info_field & VECTORING_INFO_TYPE_MASK) | ||
| 2854 | == INTR_TYPE_EXT_INTR | ||
| 2855 | && vcpu->arch.rmode.active) { | ||
| 2856 | u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK; | ||
| 2857 | |||
| 2858 | vmx_inject_irq(vcpu, vect); | ||
| 2859 | enable_intr_window(vcpu); | ||
| 2860 | return; | ||
| 2861 | } | ||
| 2862 | |||
| 2863 | KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); | ||
| 2864 | 3113 | ||
| 3114 | idt_vectoring_info = vmx->idt_vectoring_info; | ||
| 3115 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | ||
| 3116 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; | ||
| 3117 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; | ||
| 3118 | if (vmx->vcpu.arch.nmi_injected) { | ||
| 2865 | /* | 3119 | /* |
| 2866 | * SDM 3: 25.7.1.2 | 3120 | * SDM 3: 25.7.1.2 |
| 2867 | * Clear bit "block by NMI" before VM entry if a NMI delivery | 3121 | * Clear bit "block by NMI" before VM entry if a NMI delivery |
| 2868 | * faulted. | 3122 | * faulted. |
| 2869 | */ | 3123 | */ |
| 2870 | if ((idtv_info_field & VECTORING_INFO_TYPE_MASK) | 3124 | if (idtv_info_valid && type == INTR_TYPE_NMI_INTR) |
| 2871 | == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis()) | 3125 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, |
| 2872 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 3126 | GUEST_INTR_STATE_NMI); |
| 2873 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 3127 | else |
| 2874 | ~GUEST_INTR_STATE_NMI); | 3128 | vmx->vcpu.arch.nmi_injected = false; |
| 2875 | 3129 | } | |
| 2876 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field | 3130 | kvm_clear_exception_queue(&vmx->vcpu); |
| 2877 | & ~INTR_INFO_RESVD_BITS_MASK); | 3131 | if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { |
| 2878 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | 3132 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
| 2879 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); | 3133 | error = vmcs_read32(IDT_VECTORING_ERROR_CODE); |
| 2880 | 3134 | kvm_queue_exception_e(&vmx->vcpu, vector, error); | |
| 2881 | if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) | 3135 | } else |
| 2882 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | 3136 | kvm_queue_exception(&vmx->vcpu, vector); |
| 2883 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); | 3137 | vmx->idt_vectoring_info = 0; |
| 2884 | enable_intr_window(vcpu); | ||
| 2885 | return; | ||
| 2886 | } | 3138 | } |
| 3139 | kvm_clear_interrupt_queue(&vmx->vcpu); | ||
| 3140 | if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) { | ||
| 3141 | kvm_queue_interrupt(&vmx->vcpu, vector); | ||
| 3142 | vmx->idt_vectoring_info = 0; | ||
| 3143 | } | ||
| 3144 | } | ||
| 3145 | |||
| 3146 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) | ||
| 3147 | { | ||
| 3148 | update_tpr_threshold(vcpu); | ||
| 3149 | |||
| 2887 | if (cpu_has_virtual_nmis()) { | 3150 | if (cpu_has_virtual_nmis()) { |
| 2888 | /* | 3151 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { |
| 2889 | * SDM 3: 25.7.1.2 | 3152 | if (vmx_nmi_enabled(vcpu)) { |
| 2890 | * Re-set bit "block by NMI" before VM entry if vmexit caused by | 3153 | vcpu->arch.nmi_pending = false; |
| 2891 | * a guest IRET fault. | 3154 | vcpu->arch.nmi_injected = true; |
| 2892 | */ | 3155 | } else { |
| 2893 | if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) && | 3156 | enable_intr_window(vcpu); |
| 2894 | (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8) | 3157 | return; |
| 2895 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 3158 | } |
| 2896 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | | 3159 | } |
| 2897 | GUEST_INTR_STATE_NMI); | 3160 | if (vcpu->arch.nmi_injected) { |
| 2898 | else if (vcpu->arch.nmi_pending) { | 3161 | vmx_inject_nmi(vcpu); |
| 2899 | if (vmx_nmi_enabled(vcpu)) | ||
| 2900 | vmx_inject_nmi(vcpu); | ||
| 2901 | enable_intr_window(vcpu); | 3162 | enable_intr_window(vcpu); |
| 2902 | return; | 3163 | return; |
| 2903 | } | 3164 | } |
| 2904 | |||
| 2905 | } | 3165 | } |
| 2906 | if (!kvm_cpu_has_interrupt(vcpu)) | 3166 | if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { |
| 2907 | return; | 3167 | if (vmx_irq_enabled(vcpu)) |
| 2908 | if (vmx_irq_enabled(vcpu)) { | 3168 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); |
| 2909 | vector = kvm_cpu_get_interrupt(vcpu); | 3169 | else |
| 2910 | vmx_inject_irq(vcpu, vector); | 3170 | enable_irq_window(vcpu); |
| 2911 | kvm_timer_intr_post(vcpu, vector); | 3171 | } |
| 2912 | } else | 3172 | if (vcpu->arch.interrupt.pending) { |
| 2913 | enable_irq_window(vcpu); | 3173 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); |
| 3174 | kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); | ||
| 3175 | } | ||
| 2914 | } | 3176 | } |
| 2915 | 3177 | ||
| 2916 | /* | 3178 | /* |
| @@ -2922,9 +3184,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
| 2922 | static void fixup_rmode_irq(struct vcpu_vmx *vmx) | 3184 | static void fixup_rmode_irq(struct vcpu_vmx *vmx) |
| 2923 | { | 3185 | { |
| 2924 | vmx->rmode.irq.pending = 0; | 3186 | vmx->rmode.irq.pending = 0; |
| 2925 | if (vmcs_readl(GUEST_RIP) + 1 != vmx->rmode.irq.rip) | 3187 | if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip) |
| 2926 | return; | 3188 | return; |
| 2927 | vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip); | 3189 | kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip); |
| 2928 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { | 3190 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { |
| 2929 | vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK; | 3191 | vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK; |
| 2930 | vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR; | 3192 | vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR; |
| @@ -2936,11 +3198,30 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
| 2936 | | vmx->rmode.irq.vector; | 3198 | | vmx->rmode.irq.vector; |
| 2937 | } | 3199 | } |
| 2938 | 3200 | ||
| 3201 | #ifdef CONFIG_X86_64 | ||
| 3202 | #define R "r" | ||
| 3203 | #define Q "q" | ||
| 3204 | #else | ||
| 3205 | #define R "e" | ||
| 3206 | #define Q "l" | ||
| 3207 | #endif | ||
| 3208 | |||
| 2939 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3209 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2940 | { | 3210 | { |
| 2941 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3211 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2942 | u32 intr_info; | 3212 | u32 intr_info; |
| 2943 | 3213 | ||
| 3214 | /* Handle invalid guest state instead of entering VMX */ | ||
| 3215 | if (vmx->emulation_required && emulate_invalid_guest_state) { | ||
| 3216 | handle_invalid_guest_state(vcpu, kvm_run); | ||
| 3217 | return; | ||
| 3218 | } | ||
| 3219 | |||
| 3220 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | ||
| 3221 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | ||
| 3222 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) | ||
| 3223 | vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); | ||
| 3224 | |||
| 2944 | /* | 3225 | /* |
| 2945 | * Loading guest fpu may have cleared host cr0.ts | 3226 | * Loading guest fpu may have cleared host cr0.ts |
| 2946 | */ | 3227 | */ |
| @@ -2948,26 +3229,25 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2948 | 3229 | ||
| 2949 | asm( | 3230 | asm( |
| 2950 | /* Store host registers */ | 3231 | /* Store host registers */ |
| 2951 | #ifdef CONFIG_X86_64 | 3232 | "push %%"R"dx; push %%"R"bp;" |
| 2952 | "push %%rdx; push %%rbp;" | 3233 | "push %%"R"cx \n\t" |
| 2953 | "push %%rcx \n\t" | 3234 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" |
| 2954 | #else | 3235 | "je 1f \n\t" |
| 2955 | "push %%edx; push %%ebp;" | 3236 | "mov %%"R"sp, %c[host_rsp](%0) \n\t" |
| 2956 | "push %%ecx \n\t" | ||
| 2957 | #endif | ||
| 2958 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" | 3237 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" |
| 3238 | "1: \n\t" | ||
| 2959 | /* Check if vmlaunch of vmresume is needed */ | 3239 | /* Check if vmlaunch of vmresume is needed */ |
| 2960 | "cmpl $0, %c[launched](%0) \n\t" | 3240 | "cmpl $0, %c[launched](%0) \n\t" |
| 2961 | /* Load guest registers. Don't clobber flags. */ | 3241 | /* Load guest registers. Don't clobber flags. */ |
| 3242 | "mov %c[cr2](%0), %%"R"ax \n\t" | ||
| 3243 | "mov %%"R"ax, %%cr2 \n\t" | ||
| 3244 | "mov %c[rax](%0), %%"R"ax \n\t" | ||
| 3245 | "mov %c[rbx](%0), %%"R"bx \n\t" | ||
| 3246 | "mov %c[rdx](%0), %%"R"dx \n\t" | ||
| 3247 | "mov %c[rsi](%0), %%"R"si \n\t" | ||
| 3248 | "mov %c[rdi](%0), %%"R"di \n\t" | ||
| 3249 | "mov %c[rbp](%0), %%"R"bp \n\t" | ||
| 2962 | #ifdef CONFIG_X86_64 | 3250 | #ifdef CONFIG_X86_64 |
| 2963 | "mov %c[cr2](%0), %%rax \n\t" | ||
| 2964 | "mov %%rax, %%cr2 \n\t" | ||
| 2965 | "mov %c[rax](%0), %%rax \n\t" | ||
| 2966 | "mov %c[rbx](%0), %%rbx \n\t" | ||
| 2967 | "mov %c[rdx](%0), %%rdx \n\t" | ||
| 2968 | "mov %c[rsi](%0), %%rsi \n\t" | ||
| 2969 | "mov %c[rdi](%0), %%rdi \n\t" | ||
| 2970 | "mov %c[rbp](%0), %%rbp \n\t" | ||
| 2971 | "mov %c[r8](%0), %%r8 \n\t" | 3251 | "mov %c[r8](%0), %%r8 \n\t" |
| 2972 | "mov %c[r9](%0), %%r9 \n\t" | 3252 | "mov %c[r9](%0), %%r9 \n\t" |
| 2973 | "mov %c[r10](%0), %%r10 \n\t" | 3253 | "mov %c[r10](%0), %%r10 \n\t" |
| @@ -2976,18 +3256,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2976 | "mov %c[r13](%0), %%r13 \n\t" | 3256 | "mov %c[r13](%0), %%r13 \n\t" |
| 2977 | "mov %c[r14](%0), %%r14 \n\t" | 3257 | "mov %c[r14](%0), %%r14 \n\t" |
| 2978 | "mov %c[r15](%0), %%r15 \n\t" | 3258 | "mov %c[r15](%0), %%r15 \n\t" |
| 2979 | "mov %c[rcx](%0), %%rcx \n\t" /* kills %0 (rcx) */ | ||
| 2980 | #else | ||
| 2981 | "mov %c[cr2](%0), %%eax \n\t" | ||
| 2982 | "mov %%eax, %%cr2 \n\t" | ||
| 2983 | "mov %c[rax](%0), %%eax \n\t" | ||
| 2984 | "mov %c[rbx](%0), %%ebx \n\t" | ||
| 2985 | "mov %c[rdx](%0), %%edx \n\t" | ||
| 2986 | "mov %c[rsi](%0), %%esi \n\t" | ||
| 2987 | "mov %c[rdi](%0), %%edi \n\t" | ||
| 2988 | "mov %c[rbp](%0), %%ebp \n\t" | ||
| 2989 | "mov %c[rcx](%0), %%ecx \n\t" /* kills %0 (ecx) */ | ||
| 2990 | #endif | 3259 | #endif |
| 3260 | "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ | ||
| 3261 | |||
| 2991 | /* Enter guest mode */ | 3262 | /* Enter guest mode */ |
| 2992 | "jne .Llaunched \n\t" | 3263 | "jne .Llaunched \n\t" |
| 2993 | __ex(ASM_VMX_VMLAUNCH) "\n\t" | 3264 | __ex(ASM_VMX_VMLAUNCH) "\n\t" |
| @@ -2995,15 +3266,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2995 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" | 3266 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" |
| 2996 | ".Lkvm_vmx_return: " | 3267 | ".Lkvm_vmx_return: " |
| 2997 | /* Save guest registers, load host registers, keep flags */ | 3268 | /* Save guest registers, load host registers, keep flags */ |
| 3269 | "xchg %0, (%%"R"sp) \n\t" | ||
| 3270 | "mov %%"R"ax, %c[rax](%0) \n\t" | ||
| 3271 | "mov %%"R"bx, %c[rbx](%0) \n\t" | ||
| 3272 | "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t" | ||
| 3273 | "mov %%"R"dx, %c[rdx](%0) \n\t" | ||
| 3274 | "mov %%"R"si, %c[rsi](%0) \n\t" | ||
| 3275 | "mov %%"R"di, %c[rdi](%0) \n\t" | ||
| 3276 | "mov %%"R"bp, %c[rbp](%0) \n\t" | ||
| 2998 | #ifdef CONFIG_X86_64 | 3277 | #ifdef CONFIG_X86_64 |
| 2999 | "xchg %0, (%%rsp) \n\t" | ||
| 3000 | "mov %%rax, %c[rax](%0) \n\t" | ||
| 3001 | "mov %%rbx, %c[rbx](%0) \n\t" | ||
| 3002 | "pushq (%%rsp); popq %c[rcx](%0) \n\t" | ||
| 3003 | "mov %%rdx, %c[rdx](%0) \n\t" | ||
| 3004 | "mov %%rsi, %c[rsi](%0) \n\t" | ||
| 3005 | "mov %%rdi, %c[rdi](%0) \n\t" | ||
| 3006 | "mov %%rbp, %c[rbp](%0) \n\t" | ||
| 3007 | "mov %%r8, %c[r8](%0) \n\t" | 3278 | "mov %%r8, %c[r8](%0) \n\t" |
| 3008 | "mov %%r9, %c[r9](%0) \n\t" | 3279 | "mov %%r9, %c[r9](%0) \n\t" |
| 3009 | "mov %%r10, %c[r10](%0) \n\t" | 3280 | "mov %%r10, %c[r10](%0) \n\t" |
| @@ -3012,28 +3283,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3012 | "mov %%r13, %c[r13](%0) \n\t" | 3283 | "mov %%r13, %c[r13](%0) \n\t" |
| 3013 | "mov %%r14, %c[r14](%0) \n\t" | 3284 | "mov %%r14, %c[r14](%0) \n\t" |
| 3014 | "mov %%r15, %c[r15](%0) \n\t" | 3285 | "mov %%r15, %c[r15](%0) \n\t" |
| 3015 | "mov %%cr2, %%rax \n\t" | ||
| 3016 | "mov %%rax, %c[cr2](%0) \n\t" | ||
| 3017 | |||
| 3018 | "pop %%rbp; pop %%rbp; pop %%rdx \n\t" | ||
| 3019 | #else | ||
| 3020 | "xchg %0, (%%esp) \n\t" | ||
| 3021 | "mov %%eax, %c[rax](%0) \n\t" | ||
| 3022 | "mov %%ebx, %c[rbx](%0) \n\t" | ||
| 3023 | "pushl (%%esp); popl %c[rcx](%0) \n\t" | ||
| 3024 | "mov %%edx, %c[rdx](%0) \n\t" | ||
| 3025 | "mov %%esi, %c[rsi](%0) \n\t" | ||
| 3026 | "mov %%edi, %c[rdi](%0) \n\t" | ||
| 3027 | "mov %%ebp, %c[rbp](%0) \n\t" | ||
| 3028 | "mov %%cr2, %%eax \n\t" | ||
| 3029 | "mov %%eax, %c[cr2](%0) \n\t" | ||
| 3030 | |||
| 3031 | "pop %%ebp; pop %%ebp; pop %%edx \n\t" | ||
| 3032 | #endif | 3286 | #endif |
| 3287 | "mov %%cr2, %%"R"ax \n\t" | ||
| 3288 | "mov %%"R"ax, %c[cr2](%0) \n\t" | ||
| 3289 | |||
| 3290 | "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t" | ||
| 3033 | "setbe %c[fail](%0) \n\t" | 3291 | "setbe %c[fail](%0) \n\t" |
| 3034 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 3292 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
| 3035 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), | 3293 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), |
| 3036 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | 3294 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), |
| 3295 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), | ||
| 3037 | [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), | 3296 | [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), |
| 3038 | [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), | 3297 | [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), |
| 3039 | [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), | 3298 | [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), |
| @@ -3053,14 +3312,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3053 | #endif | 3312 | #endif |
| 3054 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) | 3313 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) |
| 3055 | : "cc", "memory" | 3314 | : "cc", "memory" |
| 3315 | , R"bx", R"di", R"si" | ||
| 3056 | #ifdef CONFIG_X86_64 | 3316 | #ifdef CONFIG_X86_64 |
| 3057 | , "rbx", "rdi", "rsi" | ||
| 3058 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" | 3317 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" |
| 3059 | #else | ||
| 3060 | , "ebx", "edi", "rsi" | ||
| 3061 | #endif | 3318 | #endif |
| 3062 | ); | 3319 | ); |
| 3063 | 3320 | ||
| 3321 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | ||
| 3322 | vcpu->arch.regs_dirty = 0; | ||
| 3323 | |||
| 3064 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 3324 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
| 3065 | if (vmx->rmode.irq.pending) | 3325 | if (vmx->rmode.irq.pending) |
| 3066 | fixup_rmode_irq(vmx); | 3326 | fixup_rmode_irq(vmx); |
| @@ -3080,8 +3340,13 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3080 | KVMTRACE_0D(NMI, vcpu, handler); | 3340 | KVMTRACE_0D(NMI, vcpu, handler); |
| 3081 | asm("int $2"); | 3341 | asm("int $2"); |
| 3082 | } | 3342 | } |
| 3343 | |||
| 3344 | vmx_complete_interrupts(vmx); | ||
| 3083 | } | 3345 | } |
| 3084 | 3346 | ||
| 3347 | #undef R | ||
| 3348 | #undef Q | ||
| 3349 | |||
| 3085 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) | 3350 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) |
| 3086 | { | 3351 | { |
| 3087 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3352 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -3224,8 +3489,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 3224 | .set_idt = vmx_set_idt, | 3489 | .set_idt = vmx_set_idt, |
| 3225 | .get_gdt = vmx_get_gdt, | 3490 | .get_gdt = vmx_get_gdt, |
| 3226 | .set_gdt = vmx_set_gdt, | 3491 | .set_gdt = vmx_set_gdt, |
| 3227 | .cache_regs = vcpu_load_rsp_rip, | 3492 | .cache_reg = vmx_cache_reg, |
| 3228 | .decache_regs = vcpu_put_rsp_rip, | ||
| 3229 | .get_rflags = vmx_get_rflags, | 3493 | .get_rflags = vmx_get_rflags, |
| 3230 | .set_rflags = vmx_set_rflags, | 3494 | .set_rflags = vmx_set_rflags, |
| 3231 | 3495 | ||
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 17e25995b65b..3e010d21fdd7 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h | |||
| @@ -331,9 +331,6 @@ enum vmcs_field { | |||
| 331 | 331 | ||
| 332 | #define AR_RESERVD_MASK 0xfffe0f00 | 332 | #define AR_RESERVD_MASK 0xfffe0f00 |
| 333 | 333 | ||
| 334 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 | ||
| 335 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 | ||
| 336 | |||
| 337 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 | 334 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 |
| 338 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 | 335 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 |
| 339 | 336 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19afbb644c7f..4f0677d1eae8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -4,10 +4,14 @@ | |||
| 4 | * derived from drivers/kvm/kvm_main.c | 4 | * derived from drivers/kvm/kvm_main.c |
| 5 | * | 5 | * |
| 6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
| 7 | * Copyright (C) 2008 Qumranet, Inc. | ||
| 8 | * Copyright IBM Corporation, 2008 | ||
| 7 | * | 9 | * |
| 8 | * Authors: | 10 | * Authors: |
| 9 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
| 10 | * Yaniv Kamay <yaniv@qumranet.com> | 12 | * Yaniv Kamay <yaniv@qumranet.com> |
| 13 | * Amit Shah <amit.shah@qumranet.com> | ||
| 14 | * Ben-Ami Yassour <benami@il.ibm.com> | ||
| 11 | * | 15 | * |
| 12 | * This work is licensed under the terms of the GNU GPL, version 2. See | 16 | * This work is licensed under the terms of the GNU GPL, version 2. See |
| 13 | * the COPYING file in the top-level directory. | 17 | * the COPYING file in the top-level directory. |
| @@ -19,14 +23,18 @@ | |||
| 19 | #include "mmu.h" | 23 | #include "mmu.h" |
| 20 | #include "i8254.h" | 24 | #include "i8254.h" |
| 21 | #include "tss.h" | 25 | #include "tss.h" |
| 26 | #include "kvm_cache_regs.h" | ||
| 27 | #include "x86.h" | ||
| 22 | 28 | ||
| 23 | #include <linux/clocksource.h> | 29 | #include <linux/clocksource.h> |
| 30 | #include <linux/interrupt.h> | ||
| 24 | #include <linux/kvm.h> | 31 | #include <linux/kvm.h> |
| 25 | #include <linux/fs.h> | 32 | #include <linux/fs.h> |
| 26 | #include <linux/vmalloc.h> | 33 | #include <linux/vmalloc.h> |
| 27 | #include <linux/module.h> | 34 | #include <linux/module.h> |
| 28 | #include <linux/mman.h> | 35 | #include <linux/mman.h> |
| 29 | #include <linux/highmem.h> | 36 | #include <linux/highmem.h> |
| 37 | #include <linux/intel-iommu.h> | ||
| 30 | 38 | ||
| 31 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
| 32 | #include <asm/msr.h> | 40 | #include <asm/msr.h> |
| @@ -61,6 +69,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
| 61 | struct kvm_cpuid_entry2 __user *entries); | 69 | struct kvm_cpuid_entry2 __user *entries); |
| 62 | 70 | ||
| 63 | struct kvm_x86_ops *kvm_x86_ops; | 71 | struct kvm_x86_ops *kvm_x86_ops; |
| 72 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | ||
| 64 | 73 | ||
| 65 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 74 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
| 66 | { "pf_fixed", VCPU_STAT(pf_fixed) }, | 75 | { "pf_fixed", VCPU_STAT(pf_fixed) }, |
| @@ -83,6 +92,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 83 | { "fpu_reload", VCPU_STAT(fpu_reload) }, | 92 | { "fpu_reload", VCPU_STAT(fpu_reload) }, |
| 84 | { "insn_emulation", VCPU_STAT(insn_emulation) }, | 93 | { "insn_emulation", VCPU_STAT(insn_emulation) }, |
| 85 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, | 94 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, |
| 95 | { "irq_injections", VCPU_STAT(irq_injections) }, | ||
| 86 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, | 96 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, |
| 87 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, | 97 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, |
| 88 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, | 98 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, |
| @@ -90,12 +100,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 90 | { "mmu_flooded", VM_STAT(mmu_flooded) }, | 100 | { "mmu_flooded", VM_STAT(mmu_flooded) }, |
| 91 | { "mmu_recycled", VM_STAT(mmu_recycled) }, | 101 | { "mmu_recycled", VM_STAT(mmu_recycled) }, |
| 92 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, | 102 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, |
| 103 | { "mmu_unsync", VM_STAT(mmu_unsync) }, | ||
| 93 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 104 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
| 94 | { "largepages", VM_STAT(lpages) }, | 105 | { "largepages", VM_STAT(lpages) }, |
| 95 | { NULL } | 106 | { NULL } |
| 96 | }; | 107 | }; |
| 97 | 108 | ||
| 98 | |||
| 99 | unsigned long segment_base(u16 selector) | 109 | unsigned long segment_base(u16 selector) |
| 100 | { | 110 | { |
| 101 | struct descriptor_table gdt; | 111 | struct descriptor_table gdt; |
| @@ -352,6 +362,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); | |||
| 352 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 362 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
| 353 | { | 363 | { |
| 354 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 364 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { |
| 365 | kvm_mmu_sync_roots(vcpu); | ||
| 355 | kvm_mmu_flush_tlb(vcpu); | 366 | kvm_mmu_flush_tlb(vcpu); |
| 356 | return; | 367 | return; |
| 357 | } | 368 | } |
| @@ -662,6 +673,18 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 662 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", | 673 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", |
| 663 | __func__, data); | 674 | __func__, data); |
| 664 | break; | 675 | break; |
| 676 | case MSR_IA32_DEBUGCTLMSR: | ||
| 677 | if (!data) { | ||
| 678 | /* We support the non-activated case already */ | ||
| 679 | break; | ||
| 680 | } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) { | ||
| 681 | /* Values other than LBR and BTF are vendor-specific, | ||
| 682 | thus reserved and should throw a #GP */ | ||
| 683 | return 1; | ||
| 684 | } | ||
| 685 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", | ||
| 686 | __func__, data); | ||
| 687 | break; | ||
| 665 | case MSR_IA32_UCODE_REV: | 688 | case MSR_IA32_UCODE_REV: |
| 666 | case MSR_IA32_UCODE_WRITE: | 689 | case MSR_IA32_UCODE_WRITE: |
| 667 | break; | 690 | break; |
| @@ -692,10 +715,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 692 | /* ...but clean it before doing the actual write */ | 715 | /* ...but clean it before doing the actual write */ |
| 693 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); | 716 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); |
| 694 | 717 | ||
| 695 | down_read(¤t->mm->mmap_sem); | ||
| 696 | vcpu->arch.time_page = | 718 | vcpu->arch.time_page = |
| 697 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); | 719 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); |
| 698 | up_read(¤t->mm->mmap_sem); | ||
| 699 | 720 | ||
| 700 | if (is_error_page(vcpu->arch.time_page)) { | 721 | if (is_error_page(vcpu->arch.time_page)) { |
| 701 | kvm_release_page_clean(vcpu->arch.time_page); | 722 | kvm_release_page_clean(vcpu->arch.time_page); |
| @@ -752,8 +773,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 752 | case MSR_IA32_MC0_MISC+8: | 773 | case MSR_IA32_MC0_MISC+8: |
| 753 | case MSR_IA32_MC0_MISC+12: | 774 | case MSR_IA32_MC0_MISC+12: |
| 754 | case MSR_IA32_MC0_MISC+16: | 775 | case MSR_IA32_MC0_MISC+16: |
| 776 | case MSR_IA32_MC0_MISC+20: | ||
| 755 | case MSR_IA32_UCODE_REV: | 777 | case MSR_IA32_UCODE_REV: |
| 756 | case MSR_IA32_EBL_CR_POWERON: | 778 | case MSR_IA32_EBL_CR_POWERON: |
| 779 | case MSR_IA32_DEBUGCTLMSR: | ||
| 780 | case MSR_IA32_LASTBRANCHFROMIP: | ||
| 781 | case MSR_IA32_LASTBRANCHTOIP: | ||
| 782 | case MSR_IA32_LASTINTFROMIP: | ||
| 783 | case MSR_IA32_LASTINTTOIP: | ||
| 757 | data = 0; | 784 | data = 0; |
| 758 | break; | 785 | break; |
| 759 | case MSR_MTRRcap: | 786 | case MSR_MTRRcap: |
| @@ -901,6 +928,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 901 | case KVM_CAP_PV_MMU: | 928 | case KVM_CAP_PV_MMU: |
| 902 | r = !tdp_enabled; | 929 | r = !tdp_enabled; |
| 903 | break; | 930 | break; |
| 931 | case KVM_CAP_IOMMU: | ||
| 932 | r = intel_iommu_found(); | ||
| 933 | break; | ||
| 904 | default: | 934 | default: |
| 905 | r = 0; | 935 | r = 0; |
| 906 | break; | 936 | break; |
| @@ -1303,28 +1333,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1303 | struct kvm_vcpu *vcpu = filp->private_data; | 1333 | struct kvm_vcpu *vcpu = filp->private_data; |
| 1304 | void __user *argp = (void __user *)arg; | 1334 | void __user *argp = (void __user *)arg; |
| 1305 | int r; | 1335 | int r; |
| 1336 | struct kvm_lapic_state *lapic = NULL; | ||
| 1306 | 1337 | ||
| 1307 | switch (ioctl) { | 1338 | switch (ioctl) { |
| 1308 | case KVM_GET_LAPIC: { | 1339 | case KVM_GET_LAPIC: { |
| 1309 | struct kvm_lapic_state lapic; | 1340 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
| 1310 | 1341 | ||
| 1311 | memset(&lapic, 0, sizeof lapic); | 1342 | r = -ENOMEM; |
| 1312 | r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); | 1343 | if (!lapic) |
| 1344 | goto out; | ||
| 1345 | r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); | ||
| 1313 | if (r) | 1346 | if (r) |
| 1314 | goto out; | 1347 | goto out; |
| 1315 | r = -EFAULT; | 1348 | r = -EFAULT; |
| 1316 | if (copy_to_user(argp, &lapic, sizeof lapic)) | 1349 | if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) |
| 1317 | goto out; | 1350 | goto out; |
| 1318 | r = 0; | 1351 | r = 0; |
| 1319 | break; | 1352 | break; |
| 1320 | } | 1353 | } |
| 1321 | case KVM_SET_LAPIC: { | 1354 | case KVM_SET_LAPIC: { |
| 1322 | struct kvm_lapic_state lapic; | 1355 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
| 1323 | 1356 | r = -ENOMEM; | |
| 1357 | if (!lapic) | ||
| 1358 | goto out; | ||
| 1324 | r = -EFAULT; | 1359 | r = -EFAULT; |
| 1325 | if (copy_from_user(&lapic, argp, sizeof lapic)) | 1360 | if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) |
| 1326 | goto out; | 1361 | goto out; |
| 1327 | r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; | 1362 | r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); |
| 1328 | if (r) | 1363 | if (r) |
| 1329 | goto out; | 1364 | goto out; |
| 1330 | r = 0; | 1365 | r = 0; |
| @@ -1422,6 +1457,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1422 | r = -EINVAL; | 1457 | r = -EINVAL; |
| 1423 | } | 1458 | } |
| 1424 | out: | 1459 | out: |
| 1460 | if (lapic) | ||
| 1461 | kfree(lapic); | ||
| 1425 | return r; | 1462 | return r; |
| 1426 | } | 1463 | } |
| 1427 | 1464 | ||
| @@ -1630,6 +1667,15 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 1630 | struct kvm *kvm = filp->private_data; | 1667 | struct kvm *kvm = filp->private_data; |
| 1631 | void __user *argp = (void __user *)arg; | 1668 | void __user *argp = (void __user *)arg; |
| 1632 | int r = -EINVAL; | 1669 | int r = -EINVAL; |
| 1670 | /* | ||
| 1671 | * This union makes it completely explicit to gcc-3.x | ||
| 1672 | * that these two variables' stack usage should be | ||
| 1673 | * combined, not added together. | ||
| 1674 | */ | ||
| 1675 | union { | ||
| 1676 | struct kvm_pit_state ps; | ||
| 1677 | struct kvm_memory_alias alias; | ||
| 1678 | } u; | ||
| 1633 | 1679 | ||
| 1634 | switch (ioctl) { | 1680 | switch (ioctl) { |
| 1635 | case KVM_SET_TSS_ADDR: | 1681 | case KVM_SET_TSS_ADDR: |
| @@ -1661,17 +1707,14 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 1661 | case KVM_GET_NR_MMU_PAGES: | 1707 | case KVM_GET_NR_MMU_PAGES: |
| 1662 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); | 1708 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); |
| 1663 | break; | 1709 | break; |
| 1664 | case KVM_SET_MEMORY_ALIAS: { | 1710 | case KVM_SET_MEMORY_ALIAS: |
| 1665 | struct kvm_memory_alias alias; | ||
| 1666 | |||
| 1667 | r = -EFAULT; | 1711 | r = -EFAULT; |
| 1668 | if (copy_from_user(&alias, argp, sizeof alias)) | 1712 | if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) |
| 1669 | goto out; | 1713 | goto out; |
| 1670 | r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); | 1714 | r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); |
| 1671 | if (r) | 1715 | if (r) |
| 1672 | goto out; | 1716 | goto out; |
| 1673 | break; | 1717 | break; |
| 1674 | } | ||
| 1675 | case KVM_CREATE_IRQCHIP: | 1718 | case KVM_CREATE_IRQCHIP: |
| 1676 | r = -ENOMEM; | 1719 | r = -ENOMEM; |
| 1677 | kvm->arch.vpic = kvm_create_pic(kvm); | 1720 | kvm->arch.vpic = kvm_create_pic(kvm); |
| @@ -1699,13 +1742,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 1699 | goto out; | 1742 | goto out; |
| 1700 | if (irqchip_in_kernel(kvm)) { | 1743 | if (irqchip_in_kernel(kvm)) { |
| 1701 | mutex_lock(&kvm->lock); | 1744 | mutex_lock(&kvm->lock); |
| 1702 | if (irq_event.irq < 16) | 1745 | kvm_set_irq(kvm, irq_event.irq, irq_event.level); |
| 1703 | kvm_pic_set_irq(pic_irqchip(kvm), | ||
| 1704 | irq_event.irq, | ||
| 1705 | irq_event.level); | ||
| 1706 | kvm_ioapic_set_irq(kvm->arch.vioapic, | ||
| 1707 | irq_event.irq, | ||
| 1708 | irq_event.level); | ||
| 1709 | mutex_unlock(&kvm->lock); | 1746 | mutex_unlock(&kvm->lock); |
| 1710 | r = 0; | 1747 | r = 0; |
| 1711 | } | 1748 | } |
| @@ -1713,65 +1750,77 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 1713 | } | 1750 | } |
| 1714 | case KVM_GET_IRQCHIP: { | 1751 | case KVM_GET_IRQCHIP: { |
| 1715 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 1752 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
| 1716 | struct kvm_irqchip chip; | 1753 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); |
| 1717 | 1754 | ||
| 1718 | r = -EFAULT; | 1755 | r = -ENOMEM; |
| 1719 | if (copy_from_user(&chip, argp, sizeof chip)) | 1756 | if (!chip) |
| 1720 | goto out; | 1757 | goto out; |
| 1758 | r = -EFAULT; | ||
| 1759 | if (copy_from_user(chip, argp, sizeof *chip)) | ||
| 1760 | goto get_irqchip_out; | ||
| 1721 | r = -ENXIO; | 1761 | r = -ENXIO; |
| 1722 | if (!irqchip_in_kernel(kvm)) | 1762 | if (!irqchip_in_kernel(kvm)) |
| 1723 | goto out; | 1763 | goto get_irqchip_out; |
| 1724 | r = kvm_vm_ioctl_get_irqchip(kvm, &chip); | 1764 | r = kvm_vm_ioctl_get_irqchip(kvm, chip); |
| 1725 | if (r) | 1765 | if (r) |
| 1726 | goto out; | 1766 | goto get_irqchip_out; |
| 1727 | r = -EFAULT; | 1767 | r = -EFAULT; |
| 1728 | if (copy_to_user(argp, &chip, sizeof chip)) | 1768 | if (copy_to_user(argp, chip, sizeof *chip)) |
| 1729 | goto out; | 1769 | goto get_irqchip_out; |
| 1730 | r = 0; | 1770 | r = 0; |
| 1771 | get_irqchip_out: | ||
| 1772 | kfree(chip); | ||
| 1773 | if (r) | ||
| 1774 | goto out; | ||
| 1731 | break; | 1775 | break; |
| 1732 | } | 1776 | } |
| 1733 | case KVM_SET_IRQCHIP: { | 1777 | case KVM_SET_IRQCHIP: { |
| 1734 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 1778 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
| 1735 | struct kvm_irqchip chip; | 1779 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); |
| 1736 | 1780 | ||
| 1737 | r = -EFAULT; | 1781 | r = -ENOMEM; |
| 1738 | if (copy_from_user(&chip, argp, sizeof chip)) | 1782 | if (!chip) |
| 1739 | goto out; | 1783 | goto out; |
| 1784 | r = -EFAULT; | ||
| 1785 | if (copy_from_user(chip, argp, sizeof *chip)) | ||
| 1786 | goto set_irqchip_out; | ||
| 1740 | r = -ENXIO; | 1787 | r = -ENXIO; |
| 1741 | if (!irqchip_in_kernel(kvm)) | 1788 | if (!irqchip_in_kernel(kvm)) |
| 1742 | goto out; | 1789 | goto set_irqchip_out; |
| 1743 | r = kvm_vm_ioctl_set_irqchip(kvm, &chip); | 1790 | r = kvm_vm_ioctl_set_irqchip(kvm, chip); |
| 1744 | if (r) | 1791 | if (r) |
| 1745 | goto out; | 1792 | goto set_irqchip_out; |
| 1746 | r = 0; | 1793 | r = 0; |
| 1794 | set_irqchip_out: | ||
| 1795 | kfree(chip); | ||
| 1796 | if (r) | ||
| 1797 | goto out; | ||
| 1747 | break; | 1798 | break; |
| 1748 | } | 1799 | } |
| 1749 | case KVM_GET_PIT: { | 1800 | case KVM_GET_PIT: { |
| 1750 | struct kvm_pit_state ps; | ||
| 1751 | r = -EFAULT; | 1801 | r = -EFAULT; |
| 1752 | if (copy_from_user(&ps, argp, sizeof ps)) | 1802 | if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state))) |
| 1753 | goto out; | 1803 | goto out; |
| 1754 | r = -ENXIO; | 1804 | r = -ENXIO; |
| 1755 | if (!kvm->arch.vpit) | 1805 | if (!kvm->arch.vpit) |
| 1756 | goto out; | 1806 | goto out; |
| 1757 | r = kvm_vm_ioctl_get_pit(kvm, &ps); | 1807 | r = kvm_vm_ioctl_get_pit(kvm, &u.ps); |
| 1758 | if (r) | 1808 | if (r) |
| 1759 | goto out; | 1809 | goto out; |
| 1760 | r = -EFAULT; | 1810 | r = -EFAULT; |
| 1761 | if (copy_to_user(argp, &ps, sizeof ps)) | 1811 | if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state))) |
| 1762 | goto out; | 1812 | goto out; |
| 1763 | r = 0; | 1813 | r = 0; |
| 1764 | break; | 1814 | break; |
| 1765 | } | 1815 | } |
| 1766 | case KVM_SET_PIT: { | 1816 | case KVM_SET_PIT: { |
| 1767 | struct kvm_pit_state ps; | ||
| 1768 | r = -EFAULT; | 1817 | r = -EFAULT; |
| 1769 | if (copy_from_user(&ps, argp, sizeof ps)) | 1818 | if (copy_from_user(&u.ps, argp, sizeof u.ps)) |
| 1770 | goto out; | 1819 | goto out; |
| 1771 | r = -ENXIO; | 1820 | r = -ENXIO; |
| 1772 | if (!kvm->arch.vpit) | 1821 | if (!kvm->arch.vpit) |
| 1773 | goto out; | 1822 | goto out; |
| 1774 | r = kvm_vm_ioctl_set_pit(kvm, &ps); | 1823 | r = kvm_vm_ioctl_set_pit(kvm, &u.ps); |
| 1775 | if (r) | 1824 | if (r) |
| 1776 | goto out; | 1825 | goto out; |
| 1777 | r = 0; | 1826 | r = 0; |
| @@ -2018,9 +2067,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
| 2018 | 2067 | ||
| 2019 | val = *(u64 *)new; | 2068 | val = *(u64 *)new; |
| 2020 | 2069 | ||
| 2021 | down_read(¤t->mm->mmap_sem); | ||
| 2022 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 2070 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 2023 | up_read(¤t->mm->mmap_sem); | ||
| 2024 | 2071 | ||
| 2025 | kaddr = kmap_atomic(page, KM_USER0); | 2072 | kaddr = kmap_atomic(page, KM_USER0); |
| 2026 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 2073 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); |
| @@ -2040,6 +2087,7 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | |||
| 2040 | 2087 | ||
| 2041 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | 2088 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) |
| 2042 | { | 2089 | { |
| 2090 | kvm_mmu_invlpg(vcpu, address); | ||
| 2043 | return X86EMUL_CONTINUE; | 2091 | return X86EMUL_CONTINUE; |
| 2044 | } | 2092 | } |
| 2045 | 2093 | ||
| @@ -2080,7 +2128,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | |||
| 2080 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 2128 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
| 2081 | { | 2129 | { |
| 2082 | u8 opcodes[4]; | 2130 | u8 opcodes[4]; |
| 2083 | unsigned long rip = vcpu->arch.rip; | 2131 | unsigned long rip = kvm_rip_read(vcpu); |
| 2084 | unsigned long rip_linear; | 2132 | unsigned long rip_linear; |
| 2085 | 2133 | ||
| 2086 | if (!printk_ratelimit()) | 2134 | if (!printk_ratelimit()) |
| @@ -2102,6 +2150,14 @@ static struct x86_emulate_ops emulate_ops = { | |||
| 2102 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 2150 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
| 2103 | }; | 2151 | }; |
| 2104 | 2152 | ||
| 2153 | static void cache_all_regs(struct kvm_vcpu *vcpu) | ||
| 2154 | { | ||
| 2155 | kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 2156 | kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
| 2157 | kvm_register_read(vcpu, VCPU_REGS_RIP); | ||
| 2158 | vcpu->arch.regs_dirty = ~0; | ||
| 2159 | } | ||
| 2160 | |||
| 2105 | int emulate_instruction(struct kvm_vcpu *vcpu, | 2161 | int emulate_instruction(struct kvm_vcpu *vcpu, |
| 2106 | struct kvm_run *run, | 2162 | struct kvm_run *run, |
| 2107 | unsigned long cr2, | 2163 | unsigned long cr2, |
| @@ -2111,8 +2167,15 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 2111 | int r; | 2167 | int r; |
| 2112 | struct decode_cache *c; | 2168 | struct decode_cache *c; |
| 2113 | 2169 | ||
| 2170 | kvm_clear_exception_queue(vcpu); | ||
| 2114 | vcpu->arch.mmio_fault_cr2 = cr2; | 2171 | vcpu->arch.mmio_fault_cr2 = cr2; |
| 2115 | kvm_x86_ops->cache_regs(vcpu); | 2172 | /* |
| 2173 | * TODO: fix x86_emulate.c to use guest_read/write_register | ||
| 2174 | * instead of direct ->regs accesses, can save hundred cycles | ||
| 2175 | * on Intel for instructions that don't read/change RSP, for | ||
| 2176 | * for example. | ||
| 2177 | */ | ||
| 2178 | cache_all_regs(vcpu); | ||
| 2116 | 2179 | ||
| 2117 | vcpu->mmio_is_write = 0; | 2180 | vcpu->mmio_is_write = 0; |
| 2118 | vcpu->arch.pio.string = 0; | 2181 | vcpu->arch.pio.string = 0; |
| @@ -2172,7 +2235,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 2172 | return EMULATE_DO_MMIO; | 2235 | return EMULATE_DO_MMIO; |
| 2173 | } | 2236 | } |
| 2174 | 2237 | ||
| 2175 | kvm_x86_ops->decache_regs(vcpu); | ||
| 2176 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 2238 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
| 2177 | 2239 | ||
| 2178 | if (vcpu->mmio_is_write) { | 2240 | if (vcpu->mmio_is_write) { |
| @@ -2225,20 +2287,19 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
| 2225 | struct kvm_pio_request *io = &vcpu->arch.pio; | 2287 | struct kvm_pio_request *io = &vcpu->arch.pio; |
| 2226 | long delta; | 2288 | long delta; |
| 2227 | int r; | 2289 | int r; |
| 2228 | 2290 | unsigned long val; | |
| 2229 | kvm_x86_ops->cache_regs(vcpu); | ||
| 2230 | 2291 | ||
| 2231 | if (!io->string) { | 2292 | if (!io->string) { |
| 2232 | if (io->in) | 2293 | if (io->in) { |
| 2233 | memcpy(&vcpu->arch.regs[VCPU_REGS_RAX], vcpu->arch.pio_data, | 2294 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 2234 | io->size); | 2295 | memcpy(&val, vcpu->arch.pio_data, io->size); |
| 2296 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | ||
| 2297 | } | ||
| 2235 | } else { | 2298 | } else { |
| 2236 | if (io->in) { | 2299 | if (io->in) { |
| 2237 | r = pio_copy_data(vcpu); | 2300 | r = pio_copy_data(vcpu); |
| 2238 | if (r) { | 2301 | if (r) |
| 2239 | kvm_x86_ops->cache_regs(vcpu); | ||
| 2240 | return r; | 2302 | return r; |
| 2241 | } | ||
| 2242 | } | 2303 | } |
| 2243 | 2304 | ||
| 2244 | delta = 1; | 2305 | delta = 1; |
| @@ -2248,19 +2309,24 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
| 2248 | * The size of the register should really depend on | 2309 | * The size of the register should really depend on |
| 2249 | * current address size. | 2310 | * current address size. |
| 2250 | */ | 2311 | */ |
| 2251 | vcpu->arch.regs[VCPU_REGS_RCX] -= delta; | 2312 | val = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 2313 | val -= delta; | ||
| 2314 | kvm_register_write(vcpu, VCPU_REGS_RCX, val); | ||
| 2252 | } | 2315 | } |
| 2253 | if (io->down) | 2316 | if (io->down) |
| 2254 | delta = -delta; | 2317 | delta = -delta; |
| 2255 | delta *= io->size; | 2318 | delta *= io->size; |
| 2256 | if (io->in) | 2319 | if (io->in) { |
| 2257 | vcpu->arch.regs[VCPU_REGS_RDI] += delta; | 2320 | val = kvm_register_read(vcpu, VCPU_REGS_RDI); |
| 2258 | else | 2321 | val += delta; |
| 2259 | vcpu->arch.regs[VCPU_REGS_RSI] += delta; | 2322 | kvm_register_write(vcpu, VCPU_REGS_RDI, val); |
| 2323 | } else { | ||
| 2324 | val = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
| 2325 | val += delta; | ||
| 2326 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | ||
| 2327 | } | ||
| 2260 | } | 2328 | } |
| 2261 | 2329 | ||
| 2262 | kvm_x86_ops->decache_regs(vcpu); | ||
| 2263 | |||
| 2264 | io->count -= io->cur_count; | 2330 | io->count -= io->cur_count; |
| 2265 | io->cur_count = 0; | 2331 | io->cur_count = 0; |
| 2266 | 2332 | ||
| @@ -2313,6 +2379,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
| 2313 | int size, unsigned port) | 2379 | int size, unsigned port) |
| 2314 | { | 2380 | { |
| 2315 | struct kvm_io_device *pio_dev; | 2381 | struct kvm_io_device *pio_dev; |
| 2382 | unsigned long val; | ||
| 2316 | 2383 | ||
| 2317 | vcpu->run->exit_reason = KVM_EXIT_IO; | 2384 | vcpu->run->exit_reason = KVM_EXIT_IO; |
| 2318 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 2385 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
| @@ -2333,8 +2400,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
| 2333 | KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, | 2400 | KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, |
| 2334 | handler); | 2401 | handler); |
| 2335 | 2402 | ||
| 2336 | kvm_x86_ops->cache_regs(vcpu); | 2403 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 2337 | memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); | 2404 | memcpy(vcpu->arch.pio_data, &val, 4); |
| 2338 | 2405 | ||
| 2339 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2406 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
| 2340 | 2407 | ||
| @@ -2492,11 +2559,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
| 2492 | KVMTRACE_0D(HLT, vcpu, handler); | 2559 | KVMTRACE_0D(HLT, vcpu, handler); |
| 2493 | if (irqchip_in_kernel(vcpu->kvm)) { | 2560 | if (irqchip_in_kernel(vcpu->kvm)) { |
| 2494 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; | 2561 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; |
| 2495 | up_read(&vcpu->kvm->slots_lock); | ||
| 2496 | kvm_vcpu_block(vcpu); | ||
| 2497 | down_read(&vcpu->kvm->slots_lock); | ||
| 2498 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | ||
| 2499 | return -EINTR; | ||
| 2500 | return 1; | 2562 | return 1; |
| 2501 | } else { | 2563 | } else { |
| 2502 | vcpu->run->exit_reason = KVM_EXIT_HLT; | 2564 | vcpu->run->exit_reason = KVM_EXIT_HLT; |
| @@ -2519,13 +2581,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
| 2519 | unsigned long nr, a0, a1, a2, a3, ret; | 2581 | unsigned long nr, a0, a1, a2, a3, ret; |
| 2520 | int r = 1; | 2582 | int r = 1; |
| 2521 | 2583 | ||
| 2522 | kvm_x86_ops->cache_regs(vcpu); | 2584 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 2523 | 2585 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); | |
| 2524 | nr = vcpu->arch.regs[VCPU_REGS_RAX]; | 2586 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 2525 | a0 = vcpu->arch.regs[VCPU_REGS_RBX]; | 2587 | a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| 2526 | a1 = vcpu->arch.regs[VCPU_REGS_RCX]; | 2588 | a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); |
| 2527 | a2 = vcpu->arch.regs[VCPU_REGS_RDX]; | ||
| 2528 | a3 = vcpu->arch.regs[VCPU_REGS_RSI]; | ||
| 2529 | 2589 | ||
| 2530 | KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); | 2590 | KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); |
| 2531 | 2591 | ||
| @@ -2548,8 +2608,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
| 2548 | ret = -KVM_ENOSYS; | 2608 | ret = -KVM_ENOSYS; |
| 2549 | break; | 2609 | break; |
| 2550 | } | 2610 | } |
| 2551 | vcpu->arch.regs[VCPU_REGS_RAX] = ret; | 2611 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); |
| 2552 | kvm_x86_ops->decache_regs(vcpu); | ||
| 2553 | ++vcpu->stat.hypercalls; | 2612 | ++vcpu->stat.hypercalls; |
| 2554 | return r; | 2613 | return r; |
| 2555 | } | 2614 | } |
| @@ -2559,6 +2618,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
| 2559 | { | 2618 | { |
| 2560 | char instruction[3]; | 2619 | char instruction[3]; |
| 2561 | int ret = 0; | 2620 | int ret = 0; |
| 2621 | unsigned long rip = kvm_rip_read(vcpu); | ||
| 2562 | 2622 | ||
| 2563 | 2623 | ||
| 2564 | /* | 2624 | /* |
| @@ -2568,9 +2628,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
| 2568 | */ | 2628 | */ |
| 2569 | kvm_mmu_zap_all(vcpu->kvm); | 2629 | kvm_mmu_zap_all(vcpu->kvm); |
| 2570 | 2630 | ||
| 2571 | kvm_x86_ops->cache_regs(vcpu); | ||
| 2572 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 2631 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
| 2573 | if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu) | 2632 | if (emulator_write_emulated(rip, instruction, 3, vcpu) |
| 2574 | != X86EMUL_CONTINUE) | 2633 | != X86EMUL_CONTINUE) |
| 2575 | ret = -EFAULT; | 2634 | ret = -EFAULT; |
| 2576 | 2635 | ||
| @@ -2700,13 +2759,12 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
| 2700 | u32 function, index; | 2759 | u32 function, index; |
| 2701 | struct kvm_cpuid_entry2 *e, *best; | 2760 | struct kvm_cpuid_entry2 *e, *best; |
| 2702 | 2761 | ||
| 2703 | kvm_x86_ops->cache_regs(vcpu); | 2762 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 2704 | function = vcpu->arch.regs[VCPU_REGS_RAX]; | 2763 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 2705 | index = vcpu->arch.regs[VCPU_REGS_RCX]; | 2764 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); |
| 2706 | vcpu->arch.regs[VCPU_REGS_RAX] = 0; | 2765 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); |
| 2707 | vcpu->arch.regs[VCPU_REGS_RBX] = 0; | 2766 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); |
| 2708 | vcpu->arch.regs[VCPU_REGS_RCX] = 0; | 2767 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); |
| 2709 | vcpu->arch.regs[VCPU_REGS_RDX] = 0; | ||
| 2710 | best = NULL; | 2768 | best = NULL; |
| 2711 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | 2769 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { |
| 2712 | e = &vcpu->arch.cpuid_entries[i]; | 2770 | e = &vcpu->arch.cpuid_entries[i]; |
| @@ -2724,18 +2782,17 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
| 2724 | best = e; | 2782 | best = e; |
| 2725 | } | 2783 | } |
| 2726 | if (best) { | 2784 | if (best) { |
| 2727 | vcpu->arch.regs[VCPU_REGS_RAX] = best->eax; | 2785 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); |
| 2728 | vcpu->arch.regs[VCPU_REGS_RBX] = best->ebx; | 2786 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); |
| 2729 | vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx; | 2787 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); |
| 2730 | vcpu->arch.regs[VCPU_REGS_RDX] = best->edx; | 2788 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); |
| 2731 | } | 2789 | } |
| 2732 | kvm_x86_ops->decache_regs(vcpu); | ||
| 2733 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2790 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
| 2734 | KVMTRACE_5D(CPUID, vcpu, function, | 2791 | KVMTRACE_5D(CPUID, vcpu, function, |
| 2735 | (u32)vcpu->arch.regs[VCPU_REGS_RAX], | 2792 | (u32)kvm_register_read(vcpu, VCPU_REGS_RAX), |
| 2736 | (u32)vcpu->arch.regs[VCPU_REGS_RBX], | 2793 | (u32)kvm_register_read(vcpu, VCPU_REGS_RBX), |
| 2737 | (u32)vcpu->arch.regs[VCPU_REGS_RCX], | 2794 | (u32)kvm_register_read(vcpu, VCPU_REGS_RCX), |
| 2738 | (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler); | 2795 | (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler); |
| 2739 | } | 2796 | } |
| 2740 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 2797 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
| 2741 | 2798 | ||
| @@ -2776,9 +2833,7 @@ static void vapic_enter(struct kvm_vcpu *vcpu) | |||
| 2776 | if (!apic || !apic->vapic_addr) | 2833 | if (!apic || !apic->vapic_addr) |
| 2777 | return; | 2834 | return; |
| 2778 | 2835 | ||
| 2779 | down_read(¤t->mm->mmap_sem); | ||
| 2780 | page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 2836 | page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
| 2781 | up_read(¤t->mm->mmap_sem); | ||
| 2782 | 2837 | ||
| 2783 | vcpu->arch.apic->vapic_page = page; | 2838 | vcpu->arch.apic->vapic_page = page; |
| 2784 | } | 2839 | } |
| @@ -2796,28 +2851,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu) | |||
| 2796 | up_read(&vcpu->kvm->slots_lock); | 2851 | up_read(&vcpu->kvm->slots_lock); |
| 2797 | } | 2852 | } |
| 2798 | 2853 | ||
| 2799 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2854 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2800 | { | 2855 | { |
| 2801 | int r; | 2856 | int r; |
| 2802 | 2857 | ||
| 2803 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | ||
| 2804 | pr_debug("vcpu %d received sipi with vector # %x\n", | ||
| 2805 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | ||
| 2806 | kvm_lapic_reset(vcpu); | ||
| 2807 | r = kvm_x86_ops->vcpu_reset(vcpu); | ||
| 2808 | if (r) | ||
| 2809 | return r; | ||
| 2810 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 2811 | } | ||
| 2812 | |||
| 2813 | down_read(&vcpu->kvm->slots_lock); | ||
| 2814 | vapic_enter(vcpu); | ||
| 2815 | |||
| 2816 | preempted: | ||
| 2817 | if (vcpu->guest_debug.enabled) | ||
| 2818 | kvm_x86_ops->guest_debug_pre(vcpu); | ||
| 2819 | |||
| 2820 | again: | ||
| 2821 | if (vcpu->requests) | 2858 | if (vcpu->requests) |
| 2822 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 2859 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
| 2823 | kvm_mmu_unload(vcpu); | 2860 | kvm_mmu_unload(vcpu); |
| @@ -2829,6 +2866,8 @@ again: | |||
| 2829 | if (vcpu->requests) { | 2866 | if (vcpu->requests) { |
| 2830 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) | 2867 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) |
| 2831 | __kvm_migrate_timers(vcpu); | 2868 | __kvm_migrate_timers(vcpu); |
| 2869 | if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) | ||
| 2870 | kvm_mmu_sync_roots(vcpu); | ||
| 2832 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 2871 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) |
| 2833 | kvm_x86_ops->tlb_flush(vcpu); | 2872 | kvm_x86_ops->tlb_flush(vcpu); |
| 2834 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 2873 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, |
| @@ -2854,21 +2893,15 @@ again: | |||
| 2854 | 2893 | ||
| 2855 | local_irq_disable(); | 2894 | local_irq_disable(); |
| 2856 | 2895 | ||
| 2857 | if (vcpu->requests || need_resched()) { | 2896 | if (vcpu->requests || need_resched() || signal_pending(current)) { |
| 2858 | local_irq_enable(); | 2897 | local_irq_enable(); |
| 2859 | preempt_enable(); | 2898 | preempt_enable(); |
| 2860 | r = 1; | 2899 | r = 1; |
| 2861 | goto out; | 2900 | goto out; |
| 2862 | } | 2901 | } |
| 2863 | 2902 | ||
| 2864 | if (signal_pending(current)) { | 2903 | if (vcpu->guest_debug.enabled) |
| 2865 | local_irq_enable(); | 2904 | kvm_x86_ops->guest_debug_pre(vcpu); |
| 2866 | preempt_enable(); | ||
| 2867 | r = -EINTR; | ||
| 2868 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 2869 | ++vcpu->stat.signal_exits; | ||
| 2870 | goto out; | ||
| 2871 | } | ||
| 2872 | 2905 | ||
| 2873 | vcpu->guest_mode = 1; | 2906 | vcpu->guest_mode = 1; |
| 2874 | /* | 2907 | /* |
| @@ -2917,8 +2950,8 @@ again: | |||
| 2917 | * Profile KVM exit RIPs: | 2950 | * Profile KVM exit RIPs: |
| 2918 | */ | 2951 | */ |
| 2919 | if (unlikely(prof_on == KVM_PROFILING)) { | 2952 | if (unlikely(prof_on == KVM_PROFILING)) { |
| 2920 | kvm_x86_ops->cache_regs(vcpu); | 2953 | unsigned long rip = kvm_rip_read(vcpu); |
| 2921 | profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip); | 2954 | profile_hit(KVM_PROFILING, (void *)rip); |
| 2922 | } | 2955 | } |
| 2923 | 2956 | ||
| 2924 | if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) | 2957 | if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) |
| @@ -2927,26 +2960,63 @@ again: | |||
| 2927 | kvm_lapic_sync_from_vapic(vcpu); | 2960 | kvm_lapic_sync_from_vapic(vcpu); |
| 2928 | 2961 | ||
| 2929 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); | 2962 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); |
| 2963 | out: | ||
| 2964 | return r; | ||
| 2965 | } | ||
| 2930 | 2966 | ||
| 2931 | if (r > 0) { | 2967 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2932 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 2968 | { |
| 2933 | r = -EINTR; | 2969 | int r; |
| 2934 | kvm_run->exit_reason = KVM_EXIT_INTR; | 2970 | |
| 2935 | ++vcpu->stat.request_irq_exits; | 2971 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { |
| 2936 | goto out; | 2972 | pr_debug("vcpu %d received sipi with vector # %x\n", |
| 2937 | } | 2973 | vcpu->vcpu_id, vcpu->arch.sipi_vector); |
| 2938 | if (!need_resched()) | 2974 | kvm_lapic_reset(vcpu); |
| 2939 | goto again; | 2975 | r = kvm_x86_ops->vcpu_reset(vcpu); |
| 2976 | if (r) | ||
| 2977 | return r; | ||
| 2978 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 2940 | } | 2979 | } |
| 2941 | 2980 | ||
| 2942 | out: | 2981 | down_read(&vcpu->kvm->slots_lock); |
| 2943 | up_read(&vcpu->kvm->slots_lock); | 2982 | vapic_enter(vcpu); |
| 2944 | if (r > 0) { | 2983 | |
| 2945 | kvm_resched(vcpu); | 2984 | r = 1; |
| 2946 | down_read(&vcpu->kvm->slots_lock); | 2985 | while (r > 0) { |
| 2947 | goto preempted; | 2986 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
| 2987 | r = vcpu_enter_guest(vcpu, kvm_run); | ||
| 2988 | else { | ||
| 2989 | up_read(&vcpu->kvm->slots_lock); | ||
| 2990 | kvm_vcpu_block(vcpu); | ||
| 2991 | down_read(&vcpu->kvm->slots_lock); | ||
| 2992 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | ||
| 2993 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | ||
| 2994 | vcpu->arch.mp_state = | ||
| 2995 | KVM_MP_STATE_RUNNABLE; | ||
| 2996 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | ||
| 2997 | r = -EINTR; | ||
| 2998 | } | ||
| 2999 | |||
| 3000 | if (r > 0) { | ||
| 3001 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | ||
| 3002 | r = -EINTR; | ||
| 3003 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 3004 | ++vcpu->stat.request_irq_exits; | ||
| 3005 | } | ||
| 3006 | if (signal_pending(current)) { | ||
| 3007 | r = -EINTR; | ||
| 3008 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 3009 | ++vcpu->stat.signal_exits; | ||
| 3010 | } | ||
| 3011 | if (need_resched()) { | ||
| 3012 | up_read(&vcpu->kvm->slots_lock); | ||
| 3013 | kvm_resched(vcpu); | ||
| 3014 | down_read(&vcpu->kvm->slots_lock); | ||
| 3015 | } | ||
| 3016 | } | ||
| 2948 | } | 3017 | } |
| 2949 | 3018 | ||
| 3019 | up_read(&vcpu->kvm->slots_lock); | ||
| 2950 | post_kvm_run_save(vcpu, kvm_run); | 3020 | post_kvm_run_save(vcpu, kvm_run); |
| 2951 | 3021 | ||
| 2952 | vapic_exit(vcpu); | 3022 | vapic_exit(vcpu); |
| @@ -2966,6 +3036,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2966 | 3036 | ||
| 2967 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { | 3037 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { |
| 2968 | kvm_vcpu_block(vcpu); | 3038 | kvm_vcpu_block(vcpu); |
| 3039 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); | ||
| 2969 | r = -EAGAIN; | 3040 | r = -EAGAIN; |
| 2970 | goto out; | 3041 | goto out; |
| 2971 | } | 3042 | } |
| @@ -2999,11 +3070,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2999 | } | 3070 | } |
| 3000 | } | 3071 | } |
| 3001 | #endif | 3072 | #endif |
| 3002 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { | 3073 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
| 3003 | kvm_x86_ops->cache_regs(vcpu); | 3074 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
| 3004 | vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; | 3075 | kvm_run->hypercall.ret); |
| 3005 | kvm_x86_ops->decache_regs(vcpu); | ||
| 3006 | } | ||
| 3007 | 3076 | ||
| 3008 | r = __vcpu_run(vcpu, kvm_run); | 3077 | r = __vcpu_run(vcpu, kvm_run); |
| 3009 | 3078 | ||
| @@ -3019,28 +3088,26 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 3019 | { | 3088 | { |
| 3020 | vcpu_load(vcpu); | 3089 | vcpu_load(vcpu); |
| 3021 | 3090 | ||
| 3022 | kvm_x86_ops->cache_regs(vcpu); | 3091 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 3023 | 3092 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | |
| 3024 | regs->rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 3093 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 3025 | regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX]; | 3094 | regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| 3026 | regs->rcx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3095 | regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI); |
| 3027 | regs->rdx = vcpu->arch.regs[VCPU_REGS_RDX]; | 3096 | regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); |
| 3028 | regs->rsi = vcpu->arch.regs[VCPU_REGS_RSI]; | 3097 | regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); |
| 3029 | regs->rdi = vcpu->arch.regs[VCPU_REGS_RDI]; | 3098 | regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP); |
| 3030 | regs->rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 3031 | regs->rbp = vcpu->arch.regs[VCPU_REGS_RBP]; | ||
| 3032 | #ifdef CONFIG_X86_64 | 3099 | #ifdef CONFIG_X86_64 |
| 3033 | regs->r8 = vcpu->arch.regs[VCPU_REGS_R8]; | 3100 | regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8); |
| 3034 | regs->r9 = vcpu->arch.regs[VCPU_REGS_R9]; | 3101 | regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9); |
| 3035 | regs->r10 = vcpu->arch.regs[VCPU_REGS_R10]; | 3102 | regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10); |
| 3036 | regs->r11 = vcpu->arch.regs[VCPU_REGS_R11]; | 3103 | regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11); |
| 3037 | regs->r12 = vcpu->arch.regs[VCPU_REGS_R12]; | 3104 | regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12); |
| 3038 | regs->r13 = vcpu->arch.regs[VCPU_REGS_R13]; | 3105 | regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13); |
| 3039 | regs->r14 = vcpu->arch.regs[VCPU_REGS_R14]; | 3106 | regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14); |
| 3040 | regs->r15 = vcpu->arch.regs[VCPU_REGS_R15]; | 3107 | regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15); |
| 3041 | #endif | 3108 | #endif |
| 3042 | 3109 | ||
| 3043 | regs->rip = vcpu->arch.rip; | 3110 | regs->rip = kvm_rip_read(vcpu); |
| 3044 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); | 3111 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); |
| 3045 | 3112 | ||
| 3046 | /* | 3113 | /* |
| @@ -3058,29 +3125,29 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 3058 | { | 3125 | { |
| 3059 | vcpu_load(vcpu); | 3126 | vcpu_load(vcpu); |
| 3060 | 3127 | ||
| 3061 | vcpu->arch.regs[VCPU_REGS_RAX] = regs->rax; | 3128 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); |
| 3062 | vcpu->arch.regs[VCPU_REGS_RBX] = regs->rbx; | 3129 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); |
| 3063 | vcpu->arch.regs[VCPU_REGS_RCX] = regs->rcx; | 3130 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); |
| 3064 | vcpu->arch.regs[VCPU_REGS_RDX] = regs->rdx; | 3131 | kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); |
| 3065 | vcpu->arch.regs[VCPU_REGS_RSI] = regs->rsi; | 3132 | kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); |
| 3066 | vcpu->arch.regs[VCPU_REGS_RDI] = regs->rdi; | 3133 | kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); |
| 3067 | vcpu->arch.regs[VCPU_REGS_RSP] = regs->rsp; | 3134 | kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); |
| 3068 | vcpu->arch.regs[VCPU_REGS_RBP] = regs->rbp; | 3135 | kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); |
| 3069 | #ifdef CONFIG_X86_64 | 3136 | #ifdef CONFIG_X86_64 |
| 3070 | vcpu->arch.regs[VCPU_REGS_R8] = regs->r8; | 3137 | kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8); |
| 3071 | vcpu->arch.regs[VCPU_REGS_R9] = regs->r9; | 3138 | kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9); |
| 3072 | vcpu->arch.regs[VCPU_REGS_R10] = regs->r10; | 3139 | kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10); |
| 3073 | vcpu->arch.regs[VCPU_REGS_R11] = regs->r11; | 3140 | kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11); |
| 3074 | vcpu->arch.regs[VCPU_REGS_R12] = regs->r12; | 3141 | kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12); |
| 3075 | vcpu->arch.regs[VCPU_REGS_R13] = regs->r13; | 3142 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); |
| 3076 | vcpu->arch.regs[VCPU_REGS_R14] = regs->r14; | 3143 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); |
| 3077 | vcpu->arch.regs[VCPU_REGS_R15] = regs->r15; | 3144 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); |
| 3145 | |||
| 3078 | #endif | 3146 | #endif |
| 3079 | 3147 | ||
| 3080 | vcpu->arch.rip = regs->rip; | 3148 | kvm_rip_write(vcpu, regs->rip); |
| 3081 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); | 3149 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); |
| 3082 | 3150 | ||
| 3083 | kvm_x86_ops->decache_regs(vcpu); | ||
| 3084 | 3151 | ||
| 3085 | vcpu->arch.exception.pending = false; | 3152 | vcpu->arch.exception.pending = false; |
| 3086 | 3153 | ||
| @@ -3294,11 +3361,33 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, | |||
| 3294 | return 0; | 3361 | return 0; |
| 3295 | } | 3362 | } |
| 3296 | 3363 | ||
| 3364 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
| 3365 | { | ||
| 3366 | struct kvm_segment segvar = { | ||
| 3367 | .base = selector << 4, | ||
| 3368 | .limit = 0xffff, | ||
| 3369 | .selector = selector, | ||
| 3370 | .type = 3, | ||
| 3371 | .present = 1, | ||
| 3372 | .dpl = 3, | ||
| 3373 | .db = 0, | ||
| 3374 | .s = 1, | ||
| 3375 | .l = 0, | ||
| 3376 | .g = 0, | ||
| 3377 | .avl = 0, | ||
| 3378 | .unusable = 0, | ||
| 3379 | }; | ||
| 3380 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | ||
| 3381 | return 0; | ||
| 3382 | } | ||
| 3383 | |||
| 3297 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3384 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
| 3298 | int type_bits, int seg) | 3385 | int type_bits, int seg) |
| 3299 | { | 3386 | { |
| 3300 | struct kvm_segment kvm_seg; | 3387 | struct kvm_segment kvm_seg; |
| 3301 | 3388 | ||
| 3389 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) | ||
| 3390 | return kvm_load_realmode_segment(vcpu, selector, seg); | ||
| 3302 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) | 3391 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) |
| 3303 | return 1; | 3392 | return 1; |
| 3304 | kvm_seg.type |= type_bits; | 3393 | kvm_seg.type |= type_bits; |
| @@ -3316,17 +3405,16 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
| 3316 | struct tss_segment_32 *tss) | 3405 | struct tss_segment_32 *tss) |
| 3317 | { | 3406 | { |
| 3318 | tss->cr3 = vcpu->arch.cr3; | 3407 | tss->cr3 = vcpu->arch.cr3; |
| 3319 | tss->eip = vcpu->arch.rip; | 3408 | tss->eip = kvm_rip_read(vcpu); |
| 3320 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); | 3409 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); |
| 3321 | tss->eax = vcpu->arch.regs[VCPU_REGS_RAX]; | 3410 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 3322 | tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3411 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 3323 | tss->edx = vcpu->arch.regs[VCPU_REGS_RDX]; | 3412 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| 3324 | tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX]; | 3413 | tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
| 3325 | tss->esp = vcpu->arch.regs[VCPU_REGS_RSP]; | 3414 | tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); |
| 3326 | tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP]; | 3415 | tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); |
| 3327 | tss->esi = vcpu->arch.regs[VCPU_REGS_RSI]; | 3416 | tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); |
| 3328 | tss->edi = vcpu->arch.regs[VCPU_REGS_RDI]; | 3417 | tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); |
| 3329 | |||
| 3330 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | 3418 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); |
| 3331 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | 3419 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); |
| 3332 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | 3420 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); |
| @@ -3342,17 +3430,17 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
| 3342 | { | 3430 | { |
| 3343 | kvm_set_cr3(vcpu, tss->cr3); | 3431 | kvm_set_cr3(vcpu, tss->cr3); |
| 3344 | 3432 | ||
| 3345 | vcpu->arch.rip = tss->eip; | 3433 | kvm_rip_write(vcpu, tss->eip); |
| 3346 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); | 3434 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); |
| 3347 | 3435 | ||
| 3348 | vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax; | 3436 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); |
| 3349 | vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx; | 3437 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); |
| 3350 | vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx; | 3438 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); |
| 3351 | vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx; | 3439 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); |
| 3352 | vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp; | 3440 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); |
| 3353 | vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp; | 3441 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); |
| 3354 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; | 3442 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); |
| 3355 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; | 3443 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); |
| 3356 | 3444 | ||
| 3357 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) | 3445 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) |
| 3358 | return 1; | 3446 | return 1; |
| @@ -3380,16 +3468,16 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
| 3380 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | 3468 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, |
| 3381 | struct tss_segment_16 *tss) | 3469 | struct tss_segment_16 *tss) |
| 3382 | { | 3470 | { |
| 3383 | tss->ip = vcpu->arch.rip; | 3471 | tss->ip = kvm_rip_read(vcpu); |
| 3384 | tss->flag = kvm_x86_ops->get_rflags(vcpu); | 3472 | tss->flag = kvm_x86_ops->get_rflags(vcpu); |
| 3385 | tss->ax = vcpu->arch.regs[VCPU_REGS_RAX]; | 3473 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 3386 | tss->cx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3474 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 3387 | tss->dx = vcpu->arch.regs[VCPU_REGS_RDX]; | 3475 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| 3388 | tss->bx = vcpu->arch.regs[VCPU_REGS_RBX]; | 3476 | tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
| 3389 | tss->sp = vcpu->arch.regs[VCPU_REGS_RSP]; | 3477 | tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); |
| 3390 | tss->bp = vcpu->arch.regs[VCPU_REGS_RBP]; | 3478 | tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); |
| 3391 | tss->si = vcpu->arch.regs[VCPU_REGS_RSI]; | 3479 | tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); |
| 3392 | tss->di = vcpu->arch.regs[VCPU_REGS_RDI]; | 3480 | tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); |
| 3393 | 3481 | ||
| 3394 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | 3482 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); |
| 3395 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | 3483 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); |
| @@ -3402,16 +3490,16 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
| 3402 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | 3490 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, |
| 3403 | struct tss_segment_16 *tss) | 3491 | struct tss_segment_16 *tss) |
| 3404 | { | 3492 | { |
| 3405 | vcpu->arch.rip = tss->ip; | 3493 | kvm_rip_write(vcpu, tss->ip); |
| 3406 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); | 3494 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); |
| 3407 | vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax; | 3495 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); |
| 3408 | vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx; | 3496 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); |
| 3409 | vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx; | 3497 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); |
| 3410 | vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx; | 3498 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); |
| 3411 | vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp; | 3499 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); |
| 3412 | vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp; | 3500 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); |
| 3413 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; | 3501 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); |
| 3414 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; | 3502 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); |
| 3415 | 3503 | ||
| 3416 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) | 3504 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) |
| 3417 | return 1; | 3505 | return 1; |
| @@ -3534,7 +3622,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 3534 | } | 3622 | } |
| 3535 | 3623 | ||
| 3536 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 3624 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
| 3537 | kvm_x86_ops->cache_regs(vcpu); | ||
| 3538 | 3625 | ||
| 3539 | if (nseg_desc.type & 8) | 3626 | if (nseg_desc.type & 8) |
| 3540 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, | 3627 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, |
| @@ -3559,7 +3646,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 3559 | tr_seg.type = 11; | 3646 | tr_seg.type = 11; |
| 3560 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 3647 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); |
| 3561 | out: | 3648 | out: |
| 3562 | kvm_x86_ops->decache_regs(vcpu); | ||
| 3563 | return ret; | 3649 | return ret; |
| 3564 | } | 3650 | } |
| 3565 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 3651 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
| @@ -3622,6 +3708,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 3622 | pr_debug("Set back pending irq %d\n", | 3708 | pr_debug("Set back pending irq %d\n", |
| 3623 | pending_vec); | 3709 | pending_vec); |
| 3624 | } | 3710 | } |
| 3711 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
| 3625 | } | 3712 | } |
| 3626 | 3713 | ||
| 3627 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 3714 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
| @@ -3634,6 +3721,12 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 3634 | kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | 3721 | kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); |
| 3635 | kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 3722 | kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
| 3636 | 3723 | ||
| 3724 | /* Older userspace won't unhalt the vcpu on reset. */ | ||
| 3725 | if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 && | ||
| 3726 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && | ||
| 3727 | !(vcpu->arch.cr0 & X86_CR0_PE)) | ||
| 3728 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 3729 | |||
| 3637 | vcpu_put(vcpu); | 3730 | vcpu_put(vcpu); |
| 3638 | 3731 | ||
| 3639 | return 0; | 3732 | return 0; |
| @@ -3918,6 +4011,7 @@ struct kvm *kvm_arch_create_vm(void) | |||
| 3918 | return ERR_PTR(-ENOMEM); | 4011 | return ERR_PTR(-ENOMEM); |
| 3919 | 4012 | ||
| 3920 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 4013 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
| 4014 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | ||
| 3921 | 4015 | ||
| 3922 | return kvm; | 4016 | return kvm; |
| 3923 | } | 4017 | } |
| @@ -3950,6 +4044,8 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
| 3950 | 4044 | ||
| 3951 | void kvm_arch_destroy_vm(struct kvm *kvm) | 4045 | void kvm_arch_destroy_vm(struct kvm *kvm) |
| 3952 | { | 4046 | { |
| 4047 | kvm_iommu_unmap_guest(kvm); | ||
| 4048 | kvm_free_all_assigned_devices(kvm); | ||
| 3953 | kvm_free_pit(kvm); | 4049 | kvm_free_pit(kvm); |
| 3954 | kfree(kvm->arch.vpic); | 4050 | kfree(kvm->arch.vpic); |
| 3955 | kfree(kvm->arch.vioapic); | 4051 | kfree(kvm->arch.vioapic); |
| @@ -3981,7 +4077,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 3981 | userspace_addr = do_mmap(NULL, 0, | 4077 | userspace_addr = do_mmap(NULL, 0, |
| 3982 | npages * PAGE_SIZE, | 4078 | npages * PAGE_SIZE, |
| 3983 | PROT_READ | PROT_WRITE, | 4079 | PROT_READ | PROT_WRITE, |
| 3984 | MAP_SHARED | MAP_ANONYMOUS, | 4080 | MAP_PRIVATE | MAP_ANONYMOUS, |
| 3985 | 0); | 4081 | 0); |
| 3986 | up_write(¤t->mm->mmap_sem); | 4082 | up_write(¤t->mm->mmap_sem); |
| 3987 | 4083 | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h new file mode 100644 index 000000000000..6a4be78a7384 --- /dev/null +++ b/arch/x86/kvm/x86.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | #ifndef ARCH_X86_KVM_X86_H | ||
| 2 | #define ARCH_X86_KVM_X86_H | ||
| 3 | |||
| 4 | #include <linux/kvm_host.h> | ||
| 5 | |||
| 6 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | ||
| 7 | { | ||
| 8 | vcpu->arch.exception.pending = false; | ||
| 9 | } | ||
| 10 | |||
| 11 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector) | ||
| 12 | { | ||
| 13 | vcpu->arch.interrupt.pending = true; | ||
| 14 | vcpu->arch.interrupt.nr = vector; | ||
| 15 | } | ||
| 16 | |||
| 17 | static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) | ||
| 18 | { | ||
| 19 | vcpu->arch.interrupt.pending = false; | ||
| 20 | } | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index f2f90468f8b1..ea051173b0da 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #define DPRINTF(_f, _a ...) printf(_f , ## _a) | 26 | #define DPRINTF(_f, _a ...) printf(_f , ## _a) |
| 27 | #else | 27 | #else |
| 28 | #include <linux/kvm_host.h> | 28 | #include <linux/kvm_host.h> |
| 29 | #include "kvm_cache_regs.h" | ||
| 29 | #define DPRINTF(x...) do {} while (0) | 30 | #define DPRINTF(x...) do {} while (0) |
| 30 | #endif | 31 | #endif |
| 31 | #include <linux/module.h> | 32 | #include <linux/module.h> |
| @@ -46,25 +47,26 @@ | |||
| 46 | #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ | 47 | #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ |
| 47 | #define DstReg (2<<1) /* Register operand. */ | 48 | #define DstReg (2<<1) /* Register operand. */ |
| 48 | #define DstMem (3<<1) /* Memory operand. */ | 49 | #define DstMem (3<<1) /* Memory operand. */ |
| 49 | #define DstMask (3<<1) | 50 | #define DstAcc (4<<1) /* Destination Accumulator */ |
| 51 | #define DstMask (7<<1) | ||
| 50 | /* Source operand type. */ | 52 | /* Source operand type. */ |
| 51 | #define SrcNone (0<<3) /* No source operand. */ | 53 | #define SrcNone (0<<4) /* No source operand. */ |
| 52 | #define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */ | 54 | #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */ |
| 53 | #define SrcReg (1<<3) /* Register operand. */ | 55 | #define SrcReg (1<<4) /* Register operand. */ |
| 54 | #define SrcMem (2<<3) /* Memory operand. */ | 56 | #define SrcMem (2<<4) /* Memory operand. */ |
| 55 | #define SrcMem16 (3<<3) /* Memory operand (16-bit). */ | 57 | #define SrcMem16 (3<<4) /* Memory operand (16-bit). */ |
| 56 | #define SrcMem32 (4<<3) /* Memory operand (32-bit). */ | 58 | #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ |
| 57 | #define SrcImm (5<<3) /* Immediate operand. */ | 59 | #define SrcImm (5<<4) /* Immediate operand. */ |
| 58 | #define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */ | 60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ |
| 59 | #define SrcMask (7<<3) | 61 | #define SrcMask (7<<4) |
| 60 | /* Generic ModRM decode. */ | 62 | /* Generic ModRM decode. */ |
| 61 | #define ModRM (1<<6) | 63 | #define ModRM (1<<7) |
| 62 | /* Destination is only written; never read. */ | 64 | /* Destination is only written; never read. */ |
| 63 | #define Mov (1<<7) | 65 | #define Mov (1<<8) |
| 64 | #define BitOp (1<<8) | 66 | #define BitOp (1<<9) |
| 65 | #define MemAbs (1<<9) /* Memory operand is absolute displacement */ | 67 | #define MemAbs (1<<10) /* Memory operand is absolute displacement */ |
| 66 | #define String (1<<10) /* String instruction (rep capable) */ | 68 | #define String (1<<12) /* String instruction (rep capable) */ |
| 67 | #define Stack (1<<11) /* Stack instruction (push/pop) */ | 69 | #define Stack (1<<13) /* Stack instruction (push/pop) */ |
| 68 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 70 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
| 69 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 71 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
| 70 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 72 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
| @@ -94,7 +96,7 @@ static u16 opcode_table[256] = { | |||
| 94 | /* 0x20 - 0x27 */ | 96 | /* 0x20 - 0x27 */ |
| 95 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 97 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 96 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 98 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 97 | SrcImmByte, SrcImm, 0, 0, | 99 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
| 98 | /* 0x28 - 0x2F */ | 100 | /* 0x28 - 0x2F */ |
| 99 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 101 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 100 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 102 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| @@ -106,7 +108,8 @@ static u16 opcode_table[256] = { | |||
| 106 | /* 0x38 - 0x3F */ | 108 | /* 0x38 - 0x3F */ |
| 107 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 109 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 108 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 110 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 109 | 0, 0, 0, 0, | 111 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
| 112 | 0, 0, | ||
| 110 | /* 0x40 - 0x47 */ | 113 | /* 0x40 - 0x47 */ |
| 111 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 114 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
| 112 | /* 0x48 - 0x4F */ | 115 | /* 0x48 - 0x4F */ |
| @@ -153,9 +156,16 @@ static u16 opcode_table[256] = { | |||
| 153 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 156 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, |
| 154 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 157 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, |
| 155 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 158 | ByteOp | ImplicitOps | String, ImplicitOps | String, |
| 156 | /* 0xB0 - 0xBF */ | 159 | /* 0xB0 - 0xB7 */ |
| 157 | 0, 0, 0, 0, 0, 0, 0, 0, | 160 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
| 158 | DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0, | 161 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
| 162 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | ||
| 163 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | ||
| 164 | /* 0xB8 - 0xBF */ | ||
| 165 | DstReg | SrcImm | Mov, DstReg | SrcImm | Mov, | ||
| 166 | DstReg | SrcImm | Mov, DstReg | SrcImm | Mov, | ||
| 167 | DstReg | SrcImm | Mov, DstReg | SrcImm | Mov, | ||
| 168 | DstReg | SrcImm | Mov, DstReg | SrcImm | Mov, | ||
| 159 | /* 0xC0 - 0xC7 */ | 169 | /* 0xC0 - 0xC7 */ |
| 160 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, | 170 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, |
| 161 | 0, ImplicitOps | Stack, 0, 0, | 171 | 0, ImplicitOps | Stack, 0, 0, |
| @@ -169,17 +179,20 @@ static u16 opcode_table[256] = { | |||
| 169 | /* 0xD8 - 0xDF */ | 179 | /* 0xD8 - 0xDF */ |
| 170 | 0, 0, 0, 0, 0, 0, 0, 0, | 180 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 171 | /* 0xE0 - 0xE7 */ | 181 | /* 0xE0 - 0xE7 */ |
| 172 | 0, 0, 0, 0, 0, 0, 0, 0, | 182 | 0, 0, 0, 0, |
| 183 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | ||
| 184 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | ||
| 173 | /* 0xE8 - 0xEF */ | 185 | /* 0xE8 - 0xEF */ |
| 174 | ImplicitOps | Stack, SrcImm | ImplicitOps, | 186 | ImplicitOps | Stack, SrcImm | ImplicitOps, |
| 175 | ImplicitOps, SrcImmByte | ImplicitOps, | 187 | ImplicitOps, SrcImmByte | ImplicitOps, |
| 176 | 0, 0, 0, 0, | 188 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
| 189 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | ||
| 177 | /* 0xF0 - 0xF7 */ | 190 | /* 0xF0 - 0xF7 */ |
| 178 | 0, 0, 0, 0, | 191 | 0, 0, 0, 0, |
| 179 | ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, | 192 | ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, |
| 180 | /* 0xF8 - 0xFF */ | 193 | /* 0xF8 - 0xFF */ |
| 181 | ImplicitOps, 0, ImplicitOps, ImplicitOps, | 194 | ImplicitOps, 0, ImplicitOps, ImplicitOps, |
| 182 | 0, 0, Group | Group4, Group | Group5, | 195 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, |
| 183 | }; | 196 | }; |
| 184 | 197 | ||
| 185 | static u16 twobyte_table[256] = { | 198 | static u16 twobyte_table[256] = { |
| @@ -268,15 +281,16 @@ static u16 group_table[] = { | |||
| 268 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 281 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, |
| 269 | 0, 0, 0, 0, | 282 | 0, 0, 0, 0, |
| 270 | [Group3*8] = | 283 | [Group3*8] = |
| 271 | DstMem | SrcImm | ModRM | SrcImm, 0, | 284 | DstMem | SrcImm | ModRM, 0, |
| 272 | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 285 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
| 273 | 0, 0, 0, 0, | 286 | 0, 0, 0, 0, |
| 274 | [Group4*8] = | 287 | [Group4*8] = |
| 275 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 288 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, |
| 276 | 0, 0, 0, 0, 0, 0, | 289 | 0, 0, 0, 0, 0, 0, |
| 277 | [Group5*8] = | 290 | [Group5*8] = |
| 278 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0, | 291 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
| 279 | SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0, | 292 | SrcMem | ModRM | Stack, 0, |
| 293 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | ||
| 280 | [Group7*8] = | 294 | [Group7*8] = |
| 281 | 0, 0, ModRM | SrcMem, ModRM | SrcMem, | 295 | 0, 0, ModRM | SrcMem, ModRM | SrcMem, |
| 282 | SrcNone | ModRM | DstMem | Mov, 0, | 296 | SrcNone | ModRM | DstMem | Mov, 0, |
| @@ -839,7 +853,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 839 | /* Shadow copy of register state. Committed on successful emulation. */ | 853 | /* Shadow copy of register state. Committed on successful emulation. */ |
| 840 | 854 | ||
| 841 | memset(c, 0, sizeof(struct decode_cache)); | 855 | memset(c, 0, sizeof(struct decode_cache)); |
| 842 | c->eip = ctxt->vcpu->arch.rip; | 856 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 843 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 857 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
| 844 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 858 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
| 845 | 859 | ||
| @@ -1048,6 +1062,23 @@ done_prefixes: | |||
| 1048 | } | 1062 | } |
| 1049 | c->dst.type = OP_MEM; | 1063 | c->dst.type = OP_MEM; |
| 1050 | break; | 1064 | break; |
| 1065 | case DstAcc: | ||
| 1066 | c->dst.type = OP_REG; | ||
| 1067 | c->dst.bytes = c->op_bytes; | ||
| 1068 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | ||
| 1069 | switch (c->op_bytes) { | ||
| 1070 | case 1: | ||
| 1071 | c->dst.val = *(u8 *)c->dst.ptr; | ||
| 1072 | break; | ||
| 1073 | case 2: | ||
| 1074 | c->dst.val = *(u16 *)c->dst.ptr; | ||
| 1075 | break; | ||
| 1076 | case 4: | ||
| 1077 | c->dst.val = *(u32 *)c->dst.ptr; | ||
| 1078 | break; | ||
| 1079 | } | ||
| 1080 | c->dst.orig_val = c->dst.val; | ||
| 1081 | break; | ||
| 1051 | } | 1082 | } |
| 1052 | 1083 | ||
| 1053 | if (c->rip_relative) | 1084 | if (c->rip_relative) |
| @@ -1151,6 +1182,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
| 1151 | case 1: /* dec */ | 1182 | case 1: /* dec */ |
| 1152 | emulate_1op("dec", c->dst, ctxt->eflags); | 1183 | emulate_1op("dec", c->dst, ctxt->eflags); |
| 1153 | break; | 1184 | break; |
| 1185 | case 2: /* call near abs */ { | ||
| 1186 | long int old_eip; | ||
| 1187 | old_eip = c->eip; | ||
| 1188 | c->eip = c->src.val; | ||
| 1189 | c->src.val = old_eip; | ||
| 1190 | emulate_push(ctxt); | ||
| 1191 | break; | ||
| 1192 | } | ||
| 1154 | case 4: /* jmp abs */ | 1193 | case 4: /* jmp abs */ |
| 1155 | c->eip = c->src.val; | 1194 | c->eip = c->src.val; |
| 1156 | break; | 1195 | break; |
| @@ -1251,6 +1290,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1251 | u64 msr_data; | 1290 | u64 msr_data; |
| 1252 | unsigned long saved_eip = 0; | 1291 | unsigned long saved_eip = 0; |
| 1253 | struct decode_cache *c = &ctxt->decode; | 1292 | struct decode_cache *c = &ctxt->decode; |
| 1293 | unsigned int port; | ||
| 1294 | int io_dir_in; | ||
| 1254 | int rc = 0; | 1295 | int rc = 0; |
| 1255 | 1296 | ||
| 1256 | /* Shadow copy of register state. Committed on successful emulation. | 1297 | /* Shadow copy of register state. Committed on successful emulation. |
| @@ -1267,7 +1308,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1267 | if (c->rep_prefix && (c->d & String)) { | 1308 | if (c->rep_prefix && (c->d & String)) { |
| 1268 | /* All REP prefixes have the same first termination condition */ | 1309 | /* All REP prefixes have the same first termination condition */ |
| 1269 | if (c->regs[VCPU_REGS_RCX] == 0) { | 1310 | if (c->regs[VCPU_REGS_RCX] == 0) { |
| 1270 | ctxt->vcpu->arch.rip = c->eip; | 1311 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1271 | goto done; | 1312 | goto done; |
| 1272 | } | 1313 | } |
| 1273 | /* The second termination condition only applies for REPE | 1314 | /* The second termination condition only applies for REPE |
| @@ -1281,17 +1322,17 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1281 | (c->b == 0xae) || (c->b == 0xaf)) { | 1322 | (c->b == 0xae) || (c->b == 0xaf)) { |
| 1282 | if ((c->rep_prefix == REPE_PREFIX) && | 1323 | if ((c->rep_prefix == REPE_PREFIX) && |
| 1283 | ((ctxt->eflags & EFLG_ZF) == 0)) { | 1324 | ((ctxt->eflags & EFLG_ZF) == 0)) { |
| 1284 | ctxt->vcpu->arch.rip = c->eip; | 1325 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1285 | goto done; | 1326 | goto done; |
| 1286 | } | 1327 | } |
| 1287 | if ((c->rep_prefix == REPNE_PREFIX) && | 1328 | if ((c->rep_prefix == REPNE_PREFIX) && |
| 1288 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { | 1329 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { |
| 1289 | ctxt->vcpu->arch.rip = c->eip; | 1330 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1290 | goto done; | 1331 | goto done; |
| 1291 | } | 1332 | } |
| 1292 | } | 1333 | } |
| 1293 | c->regs[VCPU_REGS_RCX]--; | 1334 | c->regs[VCPU_REGS_RCX]--; |
| 1294 | c->eip = ctxt->vcpu->arch.rip; | 1335 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 1295 | } | 1336 | } |
| 1296 | 1337 | ||
| 1297 | if (c->src.type == OP_MEM) { | 1338 | if (c->src.type == OP_MEM) { |
| @@ -1351,27 +1392,10 @@ special_insn: | |||
| 1351 | sbb: /* sbb */ | 1392 | sbb: /* sbb */ |
| 1352 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 1393 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
| 1353 | break; | 1394 | break; |
| 1354 | case 0x20 ... 0x23: | 1395 | case 0x20 ... 0x25: |
| 1355 | and: /* and */ | 1396 | and: /* and */ |
| 1356 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | 1397 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); |
| 1357 | break; | 1398 | break; |
| 1358 | case 0x24: /* and al imm8 */ | ||
| 1359 | c->dst.type = OP_REG; | ||
| 1360 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | ||
| 1361 | c->dst.val = *(u8 *)c->dst.ptr; | ||
| 1362 | c->dst.bytes = 1; | ||
| 1363 | c->dst.orig_val = c->dst.val; | ||
| 1364 | goto and; | ||
| 1365 | case 0x25: /* and ax imm16, or eax imm32 */ | ||
| 1366 | c->dst.type = OP_REG; | ||
| 1367 | c->dst.bytes = c->op_bytes; | ||
| 1368 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | ||
| 1369 | if (c->op_bytes == 2) | ||
| 1370 | c->dst.val = *(u16 *)c->dst.ptr; | ||
| 1371 | else | ||
| 1372 | c->dst.val = *(u32 *)c->dst.ptr; | ||
| 1373 | c->dst.orig_val = c->dst.val; | ||
| 1374 | goto and; | ||
| 1375 | case 0x28 ... 0x2d: | 1399 | case 0x28 ... 0x2d: |
| 1376 | sub: /* sub */ | 1400 | sub: /* sub */ |
| 1377 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); | 1401 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); |
| @@ -1659,7 +1683,7 @@ special_insn: | |||
| 1659 | case 0xae ... 0xaf: /* scas */ | 1683 | case 0xae ... 0xaf: /* scas */ |
| 1660 | DPRINTF("Urk! I don't handle SCAS.\n"); | 1684 | DPRINTF("Urk! I don't handle SCAS.\n"); |
| 1661 | goto cannot_emulate; | 1685 | goto cannot_emulate; |
| 1662 | case 0xb8: /* mov r, imm */ | 1686 | case 0xb0 ... 0xbf: /* mov r, imm */ |
| 1663 | goto mov; | 1687 | goto mov; |
| 1664 | case 0xc0 ... 0xc1: | 1688 | case 0xc0 ... 0xc1: |
| 1665 | emulate_grp2(ctxt); | 1689 | emulate_grp2(ctxt); |
| @@ -1679,6 +1703,16 @@ special_insn: | |||
| 1679 | c->src.val = c->regs[VCPU_REGS_RCX]; | 1703 | c->src.val = c->regs[VCPU_REGS_RCX]; |
| 1680 | emulate_grp2(ctxt); | 1704 | emulate_grp2(ctxt); |
| 1681 | break; | 1705 | break; |
| 1706 | case 0xe4: /* inb */ | ||
| 1707 | case 0xe5: /* in */ | ||
| 1708 | port = insn_fetch(u8, 1, c->eip); | ||
| 1709 | io_dir_in = 1; | ||
| 1710 | goto do_io; | ||
| 1711 | case 0xe6: /* outb */ | ||
| 1712 | case 0xe7: /* out */ | ||
| 1713 | port = insn_fetch(u8, 1, c->eip); | ||
| 1714 | io_dir_in = 0; | ||
| 1715 | goto do_io; | ||
| 1682 | case 0xe8: /* call (near) */ { | 1716 | case 0xe8: /* call (near) */ { |
| 1683 | long int rel; | 1717 | long int rel; |
| 1684 | switch (c->op_bytes) { | 1718 | switch (c->op_bytes) { |
| @@ -1729,6 +1763,22 @@ special_insn: | |||
| 1729 | jmp_rel(c, c->src.val); | 1763 | jmp_rel(c, c->src.val); |
| 1730 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1764 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 1731 | break; | 1765 | break; |
| 1766 | case 0xec: /* in al,dx */ | ||
| 1767 | case 0xed: /* in (e/r)ax,dx */ | ||
| 1768 | port = c->regs[VCPU_REGS_RDX]; | ||
| 1769 | io_dir_in = 1; | ||
| 1770 | goto do_io; | ||
| 1771 | case 0xee: /* out al,dx */ | ||
| 1772 | case 0xef: /* out (e/r)ax,dx */ | ||
| 1773 | port = c->regs[VCPU_REGS_RDX]; | ||
| 1774 | io_dir_in = 0; | ||
| 1775 | do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in, | ||
| 1776 | (c->d & ByteOp) ? 1 : c->op_bytes, | ||
| 1777 | port) != 0) { | ||
| 1778 | c->eip = saved_eip; | ||
| 1779 | goto cannot_emulate; | ||
| 1780 | } | ||
| 1781 | return 0; | ||
| 1732 | case 0xf4: /* hlt */ | 1782 | case 0xf4: /* hlt */ |
| 1733 | ctxt->vcpu->arch.halt_request = 1; | 1783 | ctxt->vcpu->arch.halt_request = 1; |
| 1734 | break; | 1784 | break; |
| @@ -1754,6 +1804,14 @@ special_insn: | |||
| 1754 | ctxt->eflags |= X86_EFLAGS_IF; | 1804 | ctxt->eflags |= X86_EFLAGS_IF; |
| 1755 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1805 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 1756 | break; | 1806 | break; |
| 1807 | case 0xfc: /* cld */ | ||
| 1808 | ctxt->eflags &= ~EFLG_DF; | ||
| 1809 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
| 1810 | break; | ||
| 1811 | case 0xfd: /* std */ | ||
| 1812 | ctxt->eflags |= EFLG_DF; | ||
| 1813 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
| 1814 | break; | ||
| 1757 | case 0xfe ... 0xff: /* Grp4/Grp5 */ | 1815 | case 0xfe ... 0xff: /* Grp4/Grp5 */ |
| 1758 | rc = emulate_grp45(ctxt, ops); | 1816 | rc = emulate_grp45(ctxt, ops); |
| 1759 | if (rc != 0) | 1817 | if (rc != 0) |
| @@ -1768,7 +1826,7 @@ writeback: | |||
| 1768 | 1826 | ||
| 1769 | /* Commit shadow register state. */ | 1827 | /* Commit shadow register state. */ |
| 1770 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 1828 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); |
| 1771 | ctxt->vcpu->arch.rip = c->eip; | 1829 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1772 | 1830 | ||
| 1773 | done: | 1831 | done: |
| 1774 | if (rc == X86EMUL_UNHANDLEABLE) { | 1832 | if (rc == X86EMUL_UNHANDLEABLE) { |
| @@ -1793,7 +1851,7 @@ twobyte_insn: | |||
| 1793 | goto done; | 1851 | goto done; |
| 1794 | 1852 | ||
| 1795 | /* Let the processor re-execute the fixed hypercall */ | 1853 | /* Let the processor re-execute the fixed hypercall */ |
| 1796 | c->eip = ctxt->vcpu->arch.rip; | 1854 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 1797 | /* Disable writeback. */ | 1855 | /* Disable writeback. */ |
| 1798 | c->dst.type = OP_NONE; | 1856 | c->dst.type = OP_NONE; |
| 1799 | break; | 1857 | break; |
| @@ -1889,7 +1947,7 @@ twobyte_insn: | |||
| 1889 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); | 1947 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); |
| 1890 | if (rc) { | 1948 | if (rc) { |
| 1891 | kvm_inject_gp(ctxt->vcpu, 0); | 1949 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1892 | c->eip = ctxt->vcpu->arch.rip; | 1950 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 1893 | } | 1951 | } |
| 1894 | rc = X86EMUL_CONTINUE; | 1952 | rc = X86EMUL_CONTINUE; |
| 1895 | c->dst.type = OP_NONE; | 1953 | c->dst.type = OP_NONE; |
| @@ -1899,7 +1957,7 @@ twobyte_insn: | |||
| 1899 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); | 1957 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); |
| 1900 | if (rc) { | 1958 | if (rc) { |
| 1901 | kvm_inject_gp(ctxt->vcpu, 0); | 1959 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1902 | c->eip = ctxt->vcpu->arch.rip; | 1960 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 1903 | } else { | 1961 | } else { |
| 1904 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 1962 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
| 1905 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 1963 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; |
