diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2011-02-22 12:24:26 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2011-02-22 12:41:48 -0500 |
commit | 695884fb8acd9857e0e7120ccb2150e30f4b8fef (patch) | |
tree | 49aa424c1a021ce432e9fa5ea29d37a23e4e30cc /arch/x86/kvm | |
parent | 5df91509d324d44cfb11e55d9cb02fe18b53b045 (diff) | |
parent | 04bea68b2f0eeebb089ecc67b618795925268b4a (diff) |
Merge branch 'devicetree/for-x86' of git://git.secretlab.ca/git/linux-2.6 into x86/platform
Reason: x86 devicetree support for ce4100 depends on those device tree
changes scheduled for .39.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 367 | ||||
-rw-r--r-- | arch/x86/kvm/i8259.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_cache_regs.h | 22 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 379 | ||||
-rw-r--r-- | arch/x86/kvm/mmu_audit.c | 39 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 156 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 861 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 17 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 180 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 487 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 5 |
14 files changed, 1627 insertions, 895 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ddc131ff438f..50f63648ce1b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -28,6 +28,7 @@ config KVM | |||
28 | select HAVE_KVM_IRQCHIP | 28 | select HAVE_KVM_IRQCHIP |
29 | select HAVE_KVM_EVENTFD | 29 | select HAVE_KVM_EVENTFD |
30 | select KVM_APIC_ARCHITECTURE | 30 | select KVM_APIC_ARCHITECTURE |
31 | select KVM_ASYNC_PF | ||
31 | select USER_RETURN_NOTIFIER | 32 | select USER_RETURN_NOTIFIER |
32 | select KVM_MMIO | 33 | select KVM_MMIO |
33 | ---help--- | 34 | ---help--- |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 31a7035c4bd9..f15501f431c8 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | 1 | ||
2 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 2 | ccflags-y += -Ivirt/kvm -Iarch/x86/kvm |
3 | 3 | ||
4 | CFLAGS_x86.o := -I. | 4 | CFLAGS_x86.o := -I. |
5 | CFLAGS_svm.o := -I. | 5 | CFLAGS_svm.o := -I. |
@@ -9,6 +9,7 @@ kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | |||
9 | coalesced_mmio.o irq_comm.o eventfd.o \ | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
10 | assigned-dev.o) | 10 | assigned-dev.o) |
11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) |
12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | ||
12 | 13 | ||
13 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
14 | i8254.o timer.o | 15 | i8254.o timer.o |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 38b6e8dafaff..caf966781d25 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -20,16 +20,8 @@ | |||
20 | * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 | 20 | * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #ifndef __KERNEL__ | ||
24 | #include <stdio.h> | ||
25 | #include <stdint.h> | ||
26 | #include <public/xen.h> | ||
27 | #define DPRINTF(_f, _a ...) printf(_f , ## _a) | ||
28 | #else | ||
29 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
30 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
31 | #define DPRINTF(x...) do {} while (0) | ||
32 | #endif | ||
33 | #include <linux/module.h> | 25 | #include <linux/module.h> |
34 | #include <asm/kvm_emulate.h> | 26 | #include <asm/kvm_emulate.h> |
35 | 27 | ||
@@ -418,9 +410,9 @@ address_mask(struct decode_cache *c, unsigned long reg) | |||
418 | } | 410 | } |
419 | 411 | ||
420 | static inline unsigned long | 412 | static inline unsigned long |
421 | register_address(struct decode_cache *c, unsigned long base, unsigned long reg) | 413 | register_address(struct decode_cache *c, unsigned long reg) |
422 | { | 414 | { |
423 | return base + address_mask(c, reg); | 415 | return address_mask(c, reg); |
424 | } | 416 | } |
425 | 417 | ||
426 | static inline void | 418 | static inline void |
@@ -452,60 +444,55 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, | |||
452 | return ops->get_cached_segment_base(seg, ctxt->vcpu); | 444 | return ops->get_cached_segment_base(seg, ctxt->vcpu); |
453 | } | 445 | } |
454 | 446 | ||
455 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, | 447 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt, |
456 | struct x86_emulate_ops *ops, | 448 | struct x86_emulate_ops *ops, |
457 | struct decode_cache *c) | 449 | struct decode_cache *c) |
458 | { | 450 | { |
459 | if (!c->has_seg_override) | 451 | if (!c->has_seg_override) |
460 | return 0; | 452 | return 0; |
461 | 453 | ||
462 | return seg_base(ctxt, ops, c->seg_override); | 454 | return c->seg_override; |
463 | } | 455 | } |
464 | 456 | ||
465 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt, | 457 | static ulong linear(struct x86_emulate_ctxt *ctxt, |
466 | struct x86_emulate_ops *ops) | 458 | struct segmented_address addr) |
467 | { | 459 | { |
468 | return seg_base(ctxt, ops, VCPU_SREG_ES); | 460 | struct decode_cache *c = &ctxt->decode; |
469 | } | 461 | ulong la; |
470 | |||
471 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt, | ||
472 | struct x86_emulate_ops *ops) | ||
473 | { | ||
474 | return seg_base(ctxt, ops, VCPU_SREG_SS); | ||
475 | } | ||
476 | 462 | ||
477 | static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | 463 | la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; |
478 | u32 error, bool valid) | 464 | if (c->ad_bytes != 8) |
479 | { | 465 | la &= (u32)-1; |
480 | ctxt->exception = vec; | 466 | return la; |
481 | ctxt->error_code = error; | ||
482 | ctxt->error_code_valid = valid; | ||
483 | } | 467 | } |
484 | 468 | ||
485 | static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err) | 469 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, |
470 | u32 error, bool valid) | ||
486 | { | 471 | { |
487 | emulate_exception(ctxt, GP_VECTOR, err, true); | 472 | ctxt->exception.vector = vec; |
473 | ctxt->exception.error_code = error; | ||
474 | ctxt->exception.error_code_valid = valid; | ||
475 | return X86EMUL_PROPAGATE_FAULT; | ||
488 | } | 476 | } |
489 | 477 | ||
490 | static void emulate_pf(struct x86_emulate_ctxt *ctxt) | 478 | static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) |
491 | { | 479 | { |
492 | emulate_exception(ctxt, PF_VECTOR, 0, true); | 480 | return emulate_exception(ctxt, GP_VECTOR, err, true); |
493 | } | 481 | } |
494 | 482 | ||
495 | static void emulate_ud(struct x86_emulate_ctxt *ctxt) | 483 | static int emulate_ud(struct x86_emulate_ctxt *ctxt) |
496 | { | 484 | { |
497 | emulate_exception(ctxt, UD_VECTOR, 0, false); | 485 | return emulate_exception(ctxt, UD_VECTOR, 0, false); |
498 | } | 486 | } |
499 | 487 | ||
500 | static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err) | 488 | static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err) |
501 | { | 489 | { |
502 | emulate_exception(ctxt, TS_VECTOR, err, true); | 490 | return emulate_exception(ctxt, TS_VECTOR, err, true); |
503 | } | 491 | } |
504 | 492 | ||
505 | static int emulate_de(struct x86_emulate_ctxt *ctxt) | 493 | static int emulate_de(struct x86_emulate_ctxt *ctxt) |
506 | { | 494 | { |
507 | emulate_exception(ctxt, DE_VECTOR, 0, false); | 495 | return emulate_exception(ctxt, DE_VECTOR, 0, false); |
508 | return X86EMUL_PROPAGATE_FAULT; | ||
509 | } | 496 | } |
510 | 497 | ||
511 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 498 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
@@ -520,7 +507,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
520 | cur_size = fc->end - fc->start; | 507 | cur_size = fc->end - fc->start; |
521 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); | 508 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
522 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, | 509 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, |
523 | size, ctxt->vcpu, NULL); | 510 | size, ctxt->vcpu, &ctxt->exception); |
524 | if (rc != X86EMUL_CONTINUE) | 511 | if (rc != X86EMUL_CONTINUE) |
525 | return rc; | 512 | return rc; |
526 | fc->end += size; | 513 | fc->end += size; |
@@ -564,7 +551,7 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, | |||
564 | 551 | ||
565 | static int read_descriptor(struct x86_emulate_ctxt *ctxt, | 552 | static int read_descriptor(struct x86_emulate_ctxt *ctxt, |
566 | struct x86_emulate_ops *ops, | 553 | struct x86_emulate_ops *ops, |
567 | ulong addr, | 554 | struct segmented_address addr, |
568 | u16 *size, unsigned long *address, int op_bytes) | 555 | u16 *size, unsigned long *address, int op_bytes) |
569 | { | 556 | { |
570 | int rc; | 557 | int rc; |
@@ -572,10 +559,13 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
572 | if (op_bytes == 2) | 559 | if (op_bytes == 2) |
573 | op_bytes = 3; | 560 | op_bytes = 3; |
574 | *address = 0; | 561 | *address = 0; |
575 | rc = ops->read_std(addr, (unsigned long *)size, 2, ctxt->vcpu, NULL); | 562 | rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, |
563 | ctxt->vcpu, &ctxt->exception); | ||
576 | if (rc != X86EMUL_CONTINUE) | 564 | if (rc != X86EMUL_CONTINUE) |
577 | return rc; | 565 | return rc; |
578 | rc = ops->read_std(addr + 2, address, op_bytes, ctxt->vcpu, NULL); | 566 | addr.ea += 2; |
567 | rc = ops->read_std(linear(ctxt, addr), address, op_bytes, | ||
568 | ctxt->vcpu, &ctxt->exception); | ||
579 | return rc; | 569 | return rc; |
580 | } | 570 | } |
581 | 571 | ||
@@ -768,7 +758,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
768 | break; | 758 | break; |
769 | } | 759 | } |
770 | } | 760 | } |
771 | op->addr.mem = modrm_ea; | 761 | op->addr.mem.ea = modrm_ea; |
772 | done: | 762 | done: |
773 | return rc; | 763 | return rc; |
774 | } | 764 | } |
@@ -783,13 +773,13 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
783 | op->type = OP_MEM; | 773 | op->type = OP_MEM; |
784 | switch (c->ad_bytes) { | 774 | switch (c->ad_bytes) { |
785 | case 2: | 775 | case 2: |
786 | op->addr.mem = insn_fetch(u16, 2, c->eip); | 776 | op->addr.mem.ea = insn_fetch(u16, 2, c->eip); |
787 | break; | 777 | break; |
788 | case 4: | 778 | case 4: |
789 | op->addr.mem = insn_fetch(u32, 4, c->eip); | 779 | op->addr.mem.ea = insn_fetch(u32, 4, c->eip); |
790 | break; | 780 | break; |
791 | case 8: | 781 | case 8: |
792 | op->addr.mem = insn_fetch(u64, 8, c->eip); | 782 | op->addr.mem.ea = insn_fetch(u64, 8, c->eip); |
793 | break; | 783 | break; |
794 | } | 784 | } |
795 | done: | 785 | done: |
@@ -808,7 +798,7 @@ static void fetch_bit_operand(struct decode_cache *c) | |||
808 | else if (c->src.bytes == 4) | 798 | else if (c->src.bytes == 4) |
809 | sv = (s32)c->src.val & (s32)mask; | 799 | sv = (s32)c->src.val & (s32)mask; |
810 | 800 | ||
811 | c->dst.addr.mem += (sv >> 3); | 801 | c->dst.addr.mem.ea += (sv >> 3); |
812 | } | 802 | } |
813 | 803 | ||
814 | /* only subword offset */ | 804 | /* only subword offset */ |
@@ -821,7 +811,6 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
821 | { | 811 | { |
822 | int rc; | 812 | int rc; |
823 | struct read_cache *mc = &ctxt->decode.mem_read; | 813 | struct read_cache *mc = &ctxt->decode.mem_read; |
824 | u32 err; | ||
825 | 814 | ||
826 | while (size) { | 815 | while (size) { |
827 | int n = min(size, 8u); | 816 | int n = min(size, 8u); |
@@ -829,10 +818,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
829 | if (mc->pos < mc->end) | 818 | if (mc->pos < mc->end) |
830 | goto read_cached; | 819 | goto read_cached; |
831 | 820 | ||
832 | rc = ops->read_emulated(addr, mc->data + mc->end, n, &err, | 821 | rc = ops->read_emulated(addr, mc->data + mc->end, n, |
833 | ctxt->vcpu); | 822 | &ctxt->exception, ctxt->vcpu); |
834 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
835 | emulate_pf(ctxt); | ||
836 | if (rc != X86EMUL_CONTINUE) | 823 | if (rc != X86EMUL_CONTINUE) |
837 | return rc; | 824 | return rc; |
838 | mc->end += n; | 825 | mc->end += n; |
@@ -907,19 +894,15 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
907 | struct desc_ptr dt; | 894 | struct desc_ptr dt; |
908 | u16 index = selector >> 3; | 895 | u16 index = selector >> 3; |
909 | int ret; | 896 | int ret; |
910 | u32 err; | ||
911 | ulong addr; | 897 | ulong addr; |
912 | 898 | ||
913 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 899 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
914 | 900 | ||
915 | if (dt.size < index * 8 + 7) { | 901 | if (dt.size < index * 8 + 7) |
916 | emulate_gp(ctxt, selector & 0xfffc); | 902 | return emulate_gp(ctxt, selector & 0xfffc); |
917 | return X86EMUL_PROPAGATE_FAULT; | ||
918 | } | ||
919 | addr = dt.address + index * 8; | 903 | addr = dt.address + index * 8; |
920 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 904 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, |
921 | if (ret == X86EMUL_PROPAGATE_FAULT) | 905 | &ctxt->exception); |
922 | emulate_pf(ctxt); | ||
923 | 906 | ||
924 | return ret; | 907 | return ret; |
925 | } | 908 | } |
@@ -931,21 +914,17 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
931 | { | 914 | { |
932 | struct desc_ptr dt; | 915 | struct desc_ptr dt; |
933 | u16 index = selector >> 3; | 916 | u16 index = selector >> 3; |
934 | u32 err; | ||
935 | ulong addr; | 917 | ulong addr; |
936 | int ret; | 918 | int ret; |
937 | 919 | ||
938 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 920 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
939 | 921 | ||
940 | if (dt.size < index * 8 + 7) { | 922 | if (dt.size < index * 8 + 7) |
941 | emulate_gp(ctxt, selector & 0xfffc); | 923 | return emulate_gp(ctxt, selector & 0xfffc); |
942 | return X86EMUL_PROPAGATE_FAULT; | ||
943 | } | ||
944 | 924 | ||
945 | addr = dt.address + index * 8; | 925 | addr = dt.address + index * 8; |
946 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 926 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, |
947 | if (ret == X86EMUL_PROPAGATE_FAULT) | 927 | &ctxt->exception); |
948 | emulate_pf(ctxt); | ||
949 | 928 | ||
950 | return ret; | 929 | return ret; |
951 | } | 930 | } |
@@ -1092,7 +1071,6 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1092 | { | 1071 | { |
1093 | int rc; | 1072 | int rc; |
1094 | struct decode_cache *c = &ctxt->decode; | 1073 | struct decode_cache *c = &ctxt->decode; |
1095 | u32 err; | ||
1096 | 1074 | ||
1097 | switch (c->dst.type) { | 1075 | switch (c->dst.type) { |
1098 | case OP_REG: | 1076 | case OP_REG: |
@@ -1101,21 +1079,19 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1101 | case OP_MEM: | 1079 | case OP_MEM: |
1102 | if (c->lock_prefix) | 1080 | if (c->lock_prefix) |
1103 | rc = ops->cmpxchg_emulated( | 1081 | rc = ops->cmpxchg_emulated( |
1104 | c->dst.addr.mem, | 1082 | linear(ctxt, c->dst.addr.mem), |
1105 | &c->dst.orig_val, | 1083 | &c->dst.orig_val, |
1106 | &c->dst.val, | 1084 | &c->dst.val, |
1107 | c->dst.bytes, | 1085 | c->dst.bytes, |
1108 | &err, | 1086 | &ctxt->exception, |
1109 | ctxt->vcpu); | 1087 | ctxt->vcpu); |
1110 | else | 1088 | else |
1111 | rc = ops->write_emulated( | 1089 | rc = ops->write_emulated( |
1112 | c->dst.addr.mem, | 1090 | linear(ctxt, c->dst.addr.mem), |
1113 | &c->dst.val, | 1091 | &c->dst.val, |
1114 | c->dst.bytes, | 1092 | c->dst.bytes, |
1115 | &err, | 1093 | &ctxt->exception, |
1116 | ctxt->vcpu); | 1094 | ctxt->vcpu); |
1117 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
1118 | emulate_pf(ctxt); | ||
1119 | if (rc != X86EMUL_CONTINUE) | 1095 | if (rc != X86EMUL_CONTINUE) |
1120 | return rc; | 1096 | return rc; |
1121 | break; | 1097 | break; |
@@ -1137,8 +1113,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt, | |||
1137 | c->dst.bytes = c->op_bytes; | 1113 | c->dst.bytes = c->op_bytes; |
1138 | c->dst.val = c->src.val; | 1114 | c->dst.val = c->src.val; |
1139 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1115 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
1140 | c->dst.addr.mem = register_address(c, ss_base(ctxt, ops), | 1116 | c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); |
1141 | c->regs[VCPU_REGS_RSP]); | 1117 | c->dst.addr.mem.seg = VCPU_SREG_SS; |
1142 | } | 1118 | } |
1143 | 1119 | ||
1144 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, | 1120 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
@@ -1147,10 +1123,11 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1147 | { | 1123 | { |
1148 | struct decode_cache *c = &ctxt->decode; | 1124 | struct decode_cache *c = &ctxt->decode; |
1149 | int rc; | 1125 | int rc; |
1126 | struct segmented_address addr; | ||
1150 | 1127 | ||
1151 | rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops), | 1128 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); |
1152 | c->regs[VCPU_REGS_RSP]), | 1129 | addr.seg = VCPU_SREG_SS; |
1153 | dest, len); | 1130 | rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); |
1154 | if (rc != X86EMUL_CONTINUE) | 1131 | if (rc != X86EMUL_CONTINUE) |
1155 | return rc; | 1132 | return rc; |
1156 | 1133 | ||
@@ -1184,10 +1161,8 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1184 | change_mask |= EFLG_IF; | 1161 | change_mask |= EFLG_IF; |
1185 | break; | 1162 | break; |
1186 | case X86EMUL_MODE_VM86: | 1163 | case X86EMUL_MODE_VM86: |
1187 | if (iopl < 3) { | 1164 | if (iopl < 3) |
1188 | emulate_gp(ctxt, 0); | 1165 | return emulate_gp(ctxt, 0); |
1189 | return X86EMUL_PROPAGATE_FAULT; | ||
1190 | } | ||
1191 | change_mask |= EFLG_IF; | 1166 | change_mask |= EFLG_IF; |
1192 | break; | 1167 | break; |
1193 | default: /* real mode */ | 1168 | default: /* real mode */ |
@@ -1198,9 +1173,6 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1198 | *(unsigned long *)dest = | 1173 | *(unsigned long *)dest = |
1199 | (ctxt->eflags & ~change_mask) | (val & change_mask); | 1174 | (ctxt->eflags & ~change_mask) | (val & change_mask); |
1200 | 1175 | ||
1201 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
1202 | emulate_pf(ctxt); | ||
1203 | |||
1204 | return rc; | 1176 | return rc; |
1205 | } | 1177 | } |
1206 | 1178 | ||
@@ -1287,7 +1259,6 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, | |||
1287 | gva_t cs_addr; | 1259 | gva_t cs_addr; |
1288 | gva_t eip_addr; | 1260 | gva_t eip_addr; |
1289 | u16 cs, eip; | 1261 | u16 cs, eip; |
1290 | u32 err; | ||
1291 | 1262 | ||
1292 | /* TODO: Add limit checks */ | 1263 | /* TODO: Add limit checks */ |
1293 | c->src.val = ctxt->eflags; | 1264 | c->src.val = ctxt->eflags; |
@@ -1317,11 +1288,11 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, | |||
1317 | eip_addr = dt.address + (irq << 2); | 1288 | eip_addr = dt.address + (irq << 2); |
1318 | cs_addr = dt.address + (irq << 2) + 2; | 1289 | cs_addr = dt.address + (irq << 2) + 2; |
1319 | 1290 | ||
1320 | rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &err); | 1291 | rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); |
1321 | if (rc != X86EMUL_CONTINUE) | 1292 | if (rc != X86EMUL_CONTINUE) |
1322 | return rc; | 1293 | return rc; |
1323 | 1294 | ||
1324 | rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &err); | 1295 | rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); |
1325 | if (rc != X86EMUL_CONTINUE) | 1296 | if (rc != X86EMUL_CONTINUE) |
1326 | return rc; | 1297 | return rc; |
1327 | 1298 | ||
@@ -1370,10 +1341,8 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1370 | if (rc != X86EMUL_CONTINUE) | 1341 | if (rc != X86EMUL_CONTINUE) |
1371 | return rc; | 1342 | return rc; |
1372 | 1343 | ||
1373 | if (temp_eip & ~0xffff) { | 1344 | if (temp_eip & ~0xffff) |
1374 | emulate_gp(ctxt, 0); | 1345 | return emulate_gp(ctxt, 0); |
1375 | return X86EMUL_PROPAGATE_FAULT; | ||
1376 | } | ||
1377 | 1346 | ||
1378 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1347 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1379 | 1348 | ||
@@ -1624,10 +1593,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1624 | 1593 | ||
1625 | /* syscall is not available in real mode */ | 1594 | /* syscall is not available in real mode */ |
1626 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1595 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1627 | ctxt->mode == X86EMUL_MODE_VM86) { | 1596 | ctxt->mode == X86EMUL_MODE_VM86) |
1628 | emulate_ud(ctxt); | 1597 | return emulate_ud(ctxt); |
1629 | return X86EMUL_PROPAGATE_FAULT; | ||
1630 | } | ||
1631 | 1598 | ||
1632 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1599 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1633 | 1600 | ||
@@ -1678,34 +1645,26 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1678 | u16 cs_sel, ss_sel; | 1645 | u16 cs_sel, ss_sel; |
1679 | 1646 | ||
1680 | /* inject #GP if in real mode */ | 1647 | /* inject #GP if in real mode */ |
1681 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 1648 | if (ctxt->mode == X86EMUL_MODE_REAL) |
1682 | emulate_gp(ctxt, 0); | 1649 | return emulate_gp(ctxt, 0); |
1683 | return X86EMUL_PROPAGATE_FAULT; | ||
1684 | } | ||
1685 | 1650 | ||
1686 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1651 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1687 | * Therefore, we inject an #UD. | 1652 | * Therefore, we inject an #UD. |
1688 | */ | 1653 | */ |
1689 | if (ctxt->mode == X86EMUL_MODE_PROT64) { | 1654 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
1690 | emulate_ud(ctxt); | 1655 | return emulate_ud(ctxt); |
1691 | return X86EMUL_PROPAGATE_FAULT; | ||
1692 | } | ||
1693 | 1656 | ||
1694 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1657 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1695 | 1658 | ||
1696 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 1659 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); |
1697 | switch (ctxt->mode) { | 1660 | switch (ctxt->mode) { |
1698 | case X86EMUL_MODE_PROT32: | 1661 | case X86EMUL_MODE_PROT32: |
1699 | if ((msr_data & 0xfffc) == 0x0) { | 1662 | if ((msr_data & 0xfffc) == 0x0) |
1700 | emulate_gp(ctxt, 0); | 1663 | return emulate_gp(ctxt, 0); |
1701 | return X86EMUL_PROPAGATE_FAULT; | ||
1702 | } | ||
1703 | break; | 1664 | break; |
1704 | case X86EMUL_MODE_PROT64: | 1665 | case X86EMUL_MODE_PROT64: |
1705 | if (msr_data == 0x0) { | 1666 | if (msr_data == 0x0) |
1706 | emulate_gp(ctxt, 0); | 1667 | return emulate_gp(ctxt, 0); |
1707 | return X86EMUL_PROPAGATE_FAULT; | ||
1708 | } | ||
1709 | break; | 1668 | break; |
1710 | } | 1669 | } |
1711 | 1670 | ||
@@ -1745,10 +1704,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1745 | 1704 | ||
1746 | /* inject #GP if in real mode or Virtual 8086 mode */ | 1705 | /* inject #GP if in real mode or Virtual 8086 mode */ |
1747 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1706 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1748 | ctxt->mode == X86EMUL_MODE_VM86) { | 1707 | ctxt->mode == X86EMUL_MODE_VM86) |
1749 | emulate_gp(ctxt, 0); | 1708 | return emulate_gp(ctxt, 0); |
1750 | return X86EMUL_PROPAGATE_FAULT; | ||
1751 | } | ||
1752 | 1709 | ||
1753 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1710 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1754 | 1711 | ||
@@ -1763,18 +1720,14 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1763 | switch (usermode) { | 1720 | switch (usermode) { |
1764 | case X86EMUL_MODE_PROT32: | 1721 | case X86EMUL_MODE_PROT32: |
1765 | cs_sel = (u16)(msr_data + 16); | 1722 | cs_sel = (u16)(msr_data + 16); |
1766 | if ((msr_data & 0xfffc) == 0x0) { | 1723 | if ((msr_data & 0xfffc) == 0x0) |
1767 | emulate_gp(ctxt, 0); | 1724 | return emulate_gp(ctxt, 0); |
1768 | return X86EMUL_PROPAGATE_FAULT; | ||
1769 | } | ||
1770 | ss_sel = (u16)(msr_data + 24); | 1725 | ss_sel = (u16)(msr_data + 24); |
1771 | break; | 1726 | break; |
1772 | case X86EMUL_MODE_PROT64: | 1727 | case X86EMUL_MODE_PROT64: |
1773 | cs_sel = (u16)(msr_data + 32); | 1728 | cs_sel = (u16)(msr_data + 32); |
1774 | if (msr_data == 0x0) { | 1729 | if (msr_data == 0x0) |
1775 | emulate_gp(ctxt, 0); | 1730 | return emulate_gp(ctxt, 0); |
1776 | return X86EMUL_PROPAGATE_FAULT; | ||
1777 | } | ||
1778 | ss_sel = cs_sel + 8; | 1731 | ss_sel = cs_sel + 8; |
1779 | cs.d = 0; | 1732 | cs.d = 0; |
1780 | cs.l = 1; | 1733 | cs.l = 1; |
@@ -1934,33 +1887,27 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1934 | { | 1887 | { |
1935 | struct tss_segment_16 tss_seg; | 1888 | struct tss_segment_16 tss_seg; |
1936 | int ret; | 1889 | int ret; |
1937 | u32 err, new_tss_base = get_desc_base(new_desc); | 1890 | u32 new_tss_base = get_desc_base(new_desc); |
1938 | 1891 | ||
1939 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 1892 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
1940 | &err); | 1893 | &ctxt->exception); |
1941 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 1894 | if (ret != X86EMUL_CONTINUE) |
1942 | /* FIXME: need to provide precise fault address */ | 1895 | /* FIXME: need to provide precise fault address */ |
1943 | emulate_pf(ctxt); | ||
1944 | return ret; | 1896 | return ret; |
1945 | } | ||
1946 | 1897 | ||
1947 | save_state_to_tss16(ctxt, ops, &tss_seg); | 1898 | save_state_to_tss16(ctxt, ops, &tss_seg); |
1948 | 1899 | ||
1949 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 1900 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
1950 | &err); | 1901 | &ctxt->exception); |
1951 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 1902 | if (ret != X86EMUL_CONTINUE) |
1952 | /* FIXME: need to provide precise fault address */ | 1903 | /* FIXME: need to provide precise fault address */ |
1953 | emulate_pf(ctxt); | ||
1954 | return ret; | 1904 | return ret; |
1955 | } | ||
1956 | 1905 | ||
1957 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 1906 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
1958 | &err); | 1907 | &ctxt->exception); |
1959 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 1908 | if (ret != X86EMUL_CONTINUE) |
1960 | /* FIXME: need to provide precise fault address */ | 1909 | /* FIXME: need to provide precise fault address */ |
1961 | emulate_pf(ctxt); | ||
1962 | return ret; | 1910 | return ret; |
1963 | } | ||
1964 | 1911 | ||
1965 | if (old_tss_sel != 0xffff) { | 1912 | if (old_tss_sel != 0xffff) { |
1966 | tss_seg.prev_task_link = old_tss_sel; | 1913 | tss_seg.prev_task_link = old_tss_sel; |
@@ -1968,12 +1915,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1968 | ret = ops->write_std(new_tss_base, | 1915 | ret = ops->write_std(new_tss_base, |
1969 | &tss_seg.prev_task_link, | 1916 | &tss_seg.prev_task_link, |
1970 | sizeof tss_seg.prev_task_link, | 1917 | sizeof tss_seg.prev_task_link, |
1971 | ctxt->vcpu, &err); | 1918 | ctxt->vcpu, &ctxt->exception); |
1972 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 1919 | if (ret != X86EMUL_CONTINUE) |
1973 | /* FIXME: need to provide precise fault address */ | 1920 | /* FIXME: need to provide precise fault address */ |
1974 | emulate_pf(ctxt); | ||
1975 | return ret; | 1921 | return ret; |
1976 | } | ||
1977 | } | 1922 | } |
1978 | 1923 | ||
1979 | return load_state_from_tss16(ctxt, ops, &tss_seg); | 1924 | return load_state_from_tss16(ctxt, ops, &tss_seg); |
@@ -2013,10 +1958,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2013 | struct decode_cache *c = &ctxt->decode; | 1958 | struct decode_cache *c = &ctxt->decode; |
2014 | int ret; | 1959 | int ret; |
2015 | 1960 | ||
2016 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) { | 1961 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) |
2017 | emulate_gp(ctxt, 0); | 1962 | return emulate_gp(ctxt, 0); |
2018 | return X86EMUL_PROPAGATE_FAULT; | ||
2019 | } | ||
2020 | c->eip = tss->eip; | 1963 | c->eip = tss->eip; |
2021 | ctxt->eflags = tss->eflags | 2; | 1964 | ctxt->eflags = tss->eflags | 2; |
2022 | c->regs[VCPU_REGS_RAX] = tss->eax; | 1965 | c->regs[VCPU_REGS_RAX] = tss->eax; |
@@ -2076,33 +2019,27 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2076 | { | 2019 | { |
2077 | struct tss_segment_32 tss_seg; | 2020 | struct tss_segment_32 tss_seg; |
2078 | int ret; | 2021 | int ret; |
2079 | u32 err, new_tss_base = get_desc_base(new_desc); | 2022 | u32 new_tss_base = get_desc_base(new_desc); |
2080 | 2023 | ||
2081 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2024 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
2082 | &err); | 2025 | &ctxt->exception); |
2083 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2026 | if (ret != X86EMUL_CONTINUE) |
2084 | /* FIXME: need to provide precise fault address */ | 2027 | /* FIXME: need to provide precise fault address */ |
2085 | emulate_pf(ctxt); | ||
2086 | return ret; | 2028 | return ret; |
2087 | } | ||
2088 | 2029 | ||
2089 | save_state_to_tss32(ctxt, ops, &tss_seg); | 2030 | save_state_to_tss32(ctxt, ops, &tss_seg); |
2090 | 2031 | ||
2091 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2032 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
2092 | &err); | 2033 | &ctxt->exception); |
2093 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2034 | if (ret != X86EMUL_CONTINUE) |
2094 | /* FIXME: need to provide precise fault address */ | 2035 | /* FIXME: need to provide precise fault address */ |
2095 | emulate_pf(ctxt); | ||
2096 | return ret; | 2036 | return ret; |
2097 | } | ||
2098 | 2037 | ||
2099 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2038 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
2100 | &err); | 2039 | &ctxt->exception); |
2101 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2040 | if (ret != X86EMUL_CONTINUE) |
2102 | /* FIXME: need to provide precise fault address */ | 2041 | /* FIXME: need to provide precise fault address */ |
2103 | emulate_pf(ctxt); | ||
2104 | return ret; | 2042 | return ret; |
2105 | } | ||
2106 | 2043 | ||
2107 | if (old_tss_sel != 0xffff) { | 2044 | if (old_tss_sel != 0xffff) { |
2108 | tss_seg.prev_task_link = old_tss_sel; | 2045 | tss_seg.prev_task_link = old_tss_sel; |
@@ -2110,12 +2047,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2110 | ret = ops->write_std(new_tss_base, | 2047 | ret = ops->write_std(new_tss_base, |
2111 | &tss_seg.prev_task_link, | 2048 | &tss_seg.prev_task_link, |
2112 | sizeof tss_seg.prev_task_link, | 2049 | sizeof tss_seg.prev_task_link, |
2113 | ctxt->vcpu, &err); | 2050 | ctxt->vcpu, &ctxt->exception); |
2114 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2051 | if (ret != X86EMUL_CONTINUE) |
2115 | /* FIXME: need to provide precise fault address */ | 2052 | /* FIXME: need to provide precise fault address */ |
2116 | emulate_pf(ctxt); | ||
2117 | return ret; | 2053 | return ret; |
2118 | } | ||
2119 | } | 2054 | } |
2120 | 2055 | ||
2121 | return load_state_from_tss32(ctxt, ops, &tss_seg); | 2056 | return load_state_from_tss32(ctxt, ops, &tss_seg); |
@@ -2146,10 +2081,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2146 | 2081 | ||
2147 | if (reason != TASK_SWITCH_IRET) { | 2082 | if (reason != TASK_SWITCH_IRET) { |
2148 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2083 | if ((tss_selector & 3) > next_tss_desc.dpl || |
2149 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | 2084 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) |
2150 | emulate_gp(ctxt, 0); | 2085 | return emulate_gp(ctxt, 0); |
2151 | return X86EMUL_PROPAGATE_FAULT; | ||
2152 | } | ||
2153 | } | 2086 | } |
2154 | 2087 | ||
2155 | desc_limit = desc_limit_scaled(&next_tss_desc); | 2088 | desc_limit = desc_limit_scaled(&next_tss_desc); |
@@ -2231,14 +2164,15 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2231 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 2164 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
2232 | } | 2165 | } |
2233 | 2166 | ||
2234 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, | 2167 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, |
2235 | int reg, struct operand *op) | 2168 | int reg, struct operand *op) |
2236 | { | 2169 | { |
2237 | struct decode_cache *c = &ctxt->decode; | 2170 | struct decode_cache *c = &ctxt->decode; |
2238 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | 2171 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; |
2239 | 2172 | ||
2240 | register_address_increment(c, &c->regs[reg], df * op->bytes); | 2173 | register_address_increment(c, &c->regs[reg], df * op->bytes); |
2241 | op->addr.mem = register_address(c, base, c->regs[reg]); | 2174 | op->addr.mem.ea = register_address(c, c->regs[reg]); |
2175 | op->addr.mem.seg = seg; | ||
2242 | } | 2176 | } |
2243 | 2177 | ||
2244 | static int em_push(struct x86_emulate_ctxt *ctxt) | 2178 | static int em_push(struct x86_emulate_ctxt *ctxt) |
@@ -2369,10 +2303,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | |||
2369 | struct decode_cache *c = &ctxt->decode; | 2303 | struct decode_cache *c = &ctxt->decode; |
2370 | u64 tsc = 0; | 2304 | u64 tsc = 0; |
2371 | 2305 | ||
2372 | if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) { | 2306 | if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) |
2373 | emulate_gp(ctxt, 0); | 2307 | return emulate_gp(ctxt, 0); |
2374 | return X86EMUL_PROPAGATE_FAULT; | ||
2375 | } | ||
2376 | ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); | 2308 | ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); |
2377 | c->regs[VCPU_REGS_RAX] = (u32)tsc; | 2309 | c->regs[VCPU_REGS_RAX] = (u32)tsc; |
2378 | c->regs[VCPU_REGS_RDX] = tsc >> 32; | 2310 | c->regs[VCPU_REGS_RDX] = tsc >> 32; |
@@ -2647,7 +2579,7 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
2647 | 2579 | ||
2648 | op->type = OP_IMM; | 2580 | op->type = OP_IMM; |
2649 | op->bytes = size; | 2581 | op->bytes = size; |
2650 | op->addr.mem = c->eip; | 2582 | op->addr.mem.ea = c->eip; |
2651 | /* NB. Immediates are sign-extended as necessary. */ | 2583 | /* NB. Immediates are sign-extended as necessary. */ |
2652 | switch (op->bytes) { | 2584 | switch (op->bytes) { |
2653 | case 1: | 2585 | case 1: |
@@ -2678,7 +2610,7 @@ done: | |||
2678 | } | 2610 | } |
2679 | 2611 | ||
2680 | int | 2612 | int |
2681 | x86_decode_insn(struct x86_emulate_ctxt *ctxt) | 2613 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) |
2682 | { | 2614 | { |
2683 | struct x86_emulate_ops *ops = ctxt->ops; | 2615 | struct x86_emulate_ops *ops = ctxt->ops; |
2684 | struct decode_cache *c = &ctxt->decode; | 2616 | struct decode_cache *c = &ctxt->decode; |
@@ -2689,7 +2621,10 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt) | |||
2689 | struct operand memop = { .type = OP_NONE }; | 2621 | struct operand memop = { .type = OP_NONE }; |
2690 | 2622 | ||
2691 | c->eip = ctxt->eip; | 2623 | c->eip = ctxt->eip; |
2692 | c->fetch.start = c->fetch.end = c->eip; | 2624 | c->fetch.start = c->eip; |
2625 | c->fetch.end = c->fetch.start + insn_len; | ||
2626 | if (insn_len > 0) | ||
2627 | memcpy(c->fetch.data, insn, insn_len); | ||
2693 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); | 2628 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); |
2694 | 2629 | ||
2695 | switch (mode) { | 2630 | switch (mode) { |
@@ -2803,10 +2738,8 @@ done_prefixes: | |||
2803 | c->execute = opcode.u.execute; | 2738 | c->execute = opcode.u.execute; |
2804 | 2739 | ||
2805 | /* Unrecognised? */ | 2740 | /* Unrecognised? */ |
2806 | if (c->d == 0 || (c->d & Undefined)) { | 2741 | if (c->d == 0 || (c->d & Undefined)) |
2807 | DPRINTF("Cannot emulate %02x\n", c->b); | ||
2808 | return -1; | 2742 | return -1; |
2809 | } | ||
2810 | 2743 | ||
2811 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) | 2744 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) |
2812 | c->op_bytes = 8; | 2745 | c->op_bytes = 8; |
@@ -2831,14 +2764,13 @@ done_prefixes: | |||
2831 | if (!c->has_seg_override) | 2764 | if (!c->has_seg_override) |
2832 | set_seg_override(c, VCPU_SREG_DS); | 2765 | set_seg_override(c, VCPU_SREG_DS); |
2833 | 2766 | ||
2834 | if (memop.type == OP_MEM && !(!c->twobyte && c->b == 0x8d)) | 2767 | memop.addr.mem.seg = seg_override(ctxt, ops, c); |
2835 | memop.addr.mem += seg_override_base(ctxt, ops, c); | ||
2836 | 2768 | ||
2837 | if (memop.type == OP_MEM && c->ad_bytes != 8) | 2769 | if (memop.type == OP_MEM && c->ad_bytes != 8) |
2838 | memop.addr.mem = (u32)memop.addr.mem; | 2770 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; |
2839 | 2771 | ||
2840 | if (memop.type == OP_MEM && c->rip_relative) | 2772 | if (memop.type == OP_MEM && c->rip_relative) |
2841 | memop.addr.mem += c->eip; | 2773 | memop.addr.mem.ea += c->eip; |
2842 | 2774 | ||
2843 | /* | 2775 | /* |
2844 | * Decode and fetch the source operand: register, memory | 2776 | * Decode and fetch the source operand: register, memory |
@@ -2890,14 +2822,14 @@ done_prefixes: | |||
2890 | case SrcSI: | 2822 | case SrcSI: |
2891 | c->src.type = OP_MEM; | 2823 | c->src.type = OP_MEM; |
2892 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2824 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
2893 | c->src.addr.mem = | 2825 | c->src.addr.mem.ea = |
2894 | register_address(c, seg_override_base(ctxt, ops, c), | 2826 | register_address(c, c->regs[VCPU_REGS_RSI]); |
2895 | c->regs[VCPU_REGS_RSI]); | 2827 | c->src.addr.mem.seg = seg_override(ctxt, ops, c), |
2896 | c->src.val = 0; | 2828 | c->src.val = 0; |
2897 | break; | 2829 | break; |
2898 | case SrcImmFAddr: | 2830 | case SrcImmFAddr: |
2899 | c->src.type = OP_IMM; | 2831 | c->src.type = OP_IMM; |
2900 | c->src.addr.mem = c->eip; | 2832 | c->src.addr.mem.ea = c->eip; |
2901 | c->src.bytes = c->op_bytes + 2; | 2833 | c->src.bytes = c->op_bytes + 2; |
2902 | insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); | 2834 | insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); |
2903 | break; | 2835 | break; |
@@ -2944,7 +2876,7 @@ done_prefixes: | |||
2944 | break; | 2876 | break; |
2945 | case DstImmUByte: | 2877 | case DstImmUByte: |
2946 | c->dst.type = OP_IMM; | 2878 | c->dst.type = OP_IMM; |
2947 | c->dst.addr.mem = c->eip; | 2879 | c->dst.addr.mem.ea = c->eip; |
2948 | c->dst.bytes = 1; | 2880 | c->dst.bytes = 1; |
2949 | c->dst.val = insn_fetch(u8, 1, c->eip); | 2881 | c->dst.val = insn_fetch(u8, 1, c->eip); |
2950 | break; | 2882 | break; |
@@ -2969,9 +2901,9 @@ done_prefixes: | |||
2969 | case DstDI: | 2901 | case DstDI: |
2970 | c->dst.type = OP_MEM; | 2902 | c->dst.type = OP_MEM; |
2971 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2903 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
2972 | c->dst.addr.mem = | 2904 | c->dst.addr.mem.ea = |
2973 | register_address(c, es_base(ctxt, ops), | 2905 | register_address(c, c->regs[VCPU_REGS_RDI]); |
2974 | c->regs[VCPU_REGS_RDI]); | 2906 | c->dst.addr.mem.seg = VCPU_SREG_ES; |
2975 | c->dst.val = 0; | 2907 | c->dst.val = 0; |
2976 | break; | 2908 | break; |
2977 | case ImplicitOps: | 2909 | case ImplicitOps: |
@@ -3020,24 +2952,24 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3020 | ctxt->decode.mem_read.pos = 0; | 2952 | ctxt->decode.mem_read.pos = 0; |
3021 | 2953 | ||
3022 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | 2954 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { |
3023 | emulate_ud(ctxt); | 2955 | rc = emulate_ud(ctxt); |
3024 | goto done; | 2956 | goto done; |
3025 | } | 2957 | } |
3026 | 2958 | ||
3027 | /* LOCK prefix is allowed only with some instructions */ | 2959 | /* LOCK prefix is allowed only with some instructions */ |
3028 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { | 2960 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
3029 | emulate_ud(ctxt); | 2961 | rc = emulate_ud(ctxt); |
3030 | goto done; | 2962 | goto done; |
3031 | } | 2963 | } |
3032 | 2964 | ||
3033 | if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) { | 2965 | if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) { |
3034 | emulate_ud(ctxt); | 2966 | rc = emulate_ud(ctxt); |
3035 | goto done; | 2967 | goto done; |
3036 | } | 2968 | } |
3037 | 2969 | ||
3038 | /* Privileged instruction can be executed only in CPL=0 */ | 2970 | /* Privileged instruction can be executed only in CPL=0 */ |
3039 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { | 2971 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
3040 | emulate_gp(ctxt, 0); | 2972 | rc = emulate_gp(ctxt, 0); |
3041 | goto done; | 2973 | goto done; |
3042 | } | 2974 | } |
3043 | 2975 | ||
@@ -3050,7 +2982,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3050 | } | 2982 | } |
3051 | 2983 | ||
3052 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { | 2984 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { |
3053 | rc = read_emulated(ctxt, ops, c->src.addr.mem, | 2985 | rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), |
3054 | c->src.valptr, c->src.bytes); | 2986 | c->src.valptr, c->src.bytes); |
3055 | if (rc != X86EMUL_CONTINUE) | 2987 | if (rc != X86EMUL_CONTINUE) |
3056 | goto done; | 2988 | goto done; |
@@ -3058,7 +2990,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3058 | } | 2990 | } |
3059 | 2991 | ||
3060 | if (c->src2.type == OP_MEM) { | 2992 | if (c->src2.type == OP_MEM) { |
3061 | rc = read_emulated(ctxt, ops, c->src2.addr.mem, | 2993 | rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), |
3062 | &c->src2.val, c->src2.bytes); | 2994 | &c->src2.val, c->src2.bytes); |
3063 | if (rc != X86EMUL_CONTINUE) | 2995 | if (rc != X86EMUL_CONTINUE) |
3064 | goto done; | 2996 | goto done; |
@@ -3070,7 +3002,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3070 | 3002 | ||
3071 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { | 3003 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
3072 | /* optimisation - avoid slow emulated read if Mov */ | 3004 | /* optimisation - avoid slow emulated read if Mov */ |
3073 | rc = read_emulated(ctxt, ops, c->dst.addr.mem, | 3005 | rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), |
3074 | &c->dst.val, c->dst.bytes); | 3006 | &c->dst.val, c->dst.bytes); |
3075 | if (rc != X86EMUL_CONTINUE) | 3007 | if (rc != X86EMUL_CONTINUE) |
3076 | goto done; | 3008 | goto done; |
@@ -3215,13 +3147,13 @@ special_insn: | |||
3215 | break; | 3147 | break; |
3216 | case 0x8c: /* mov r/m, sreg */ | 3148 | case 0x8c: /* mov r/m, sreg */ |
3217 | if (c->modrm_reg > VCPU_SREG_GS) { | 3149 | if (c->modrm_reg > VCPU_SREG_GS) { |
3218 | emulate_ud(ctxt); | 3150 | rc = emulate_ud(ctxt); |
3219 | goto done; | 3151 | goto done; |
3220 | } | 3152 | } |
3221 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); | 3153 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); |
3222 | break; | 3154 | break; |
3223 | case 0x8d: /* lea r16/r32, m */ | 3155 | case 0x8d: /* lea r16/r32, m */ |
3224 | c->dst.val = c->src.addr.mem; | 3156 | c->dst.val = c->src.addr.mem.ea; |
3225 | break; | 3157 | break; |
3226 | case 0x8e: { /* mov seg, r/m16 */ | 3158 | case 0x8e: { /* mov seg, r/m16 */ |
3227 | uint16_t sel; | 3159 | uint16_t sel; |
@@ -3230,7 +3162,7 @@ special_insn: | |||
3230 | 3162 | ||
3231 | if (c->modrm_reg == VCPU_SREG_CS || | 3163 | if (c->modrm_reg == VCPU_SREG_CS || |
3232 | c->modrm_reg > VCPU_SREG_GS) { | 3164 | c->modrm_reg > VCPU_SREG_GS) { |
3233 | emulate_ud(ctxt); | 3165 | rc = emulate_ud(ctxt); |
3234 | goto done; | 3166 | goto done; |
3235 | } | 3167 | } |
3236 | 3168 | ||
@@ -3268,7 +3200,6 @@ special_insn: | |||
3268 | break; | 3200 | break; |
3269 | case 0xa6 ... 0xa7: /* cmps */ | 3201 | case 0xa6 ... 0xa7: /* cmps */ |
3270 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3202 | c->dst.type = OP_NONE; /* Disable writeback. */ |
3271 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.addr.mem, c->dst.addr.mem); | ||
3272 | goto cmp; | 3203 | goto cmp; |
3273 | case 0xa8 ... 0xa9: /* test ax, imm */ | 3204 | case 0xa8 ... 0xa9: /* test ax, imm */ |
3274 | goto test; | 3205 | goto test; |
@@ -3363,7 +3294,7 @@ special_insn: | |||
3363 | do_io_in: | 3294 | do_io_in: |
3364 | c->dst.bytes = min(c->dst.bytes, 4u); | 3295 | c->dst.bytes = min(c->dst.bytes, 4u); |
3365 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | 3296 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
3366 | emulate_gp(ctxt, 0); | 3297 | rc = emulate_gp(ctxt, 0); |
3367 | goto done; | 3298 | goto done; |
3368 | } | 3299 | } |
3369 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | 3300 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, |
@@ -3377,7 +3308,7 @@ special_insn: | |||
3377 | c->src.bytes = min(c->src.bytes, 4u); | 3308 | c->src.bytes = min(c->src.bytes, 4u); |
3378 | if (!emulator_io_permited(ctxt, ops, c->dst.val, | 3309 | if (!emulator_io_permited(ctxt, ops, c->dst.val, |
3379 | c->src.bytes)) { | 3310 | c->src.bytes)) { |
3380 | emulate_gp(ctxt, 0); | 3311 | rc = emulate_gp(ctxt, 0); |
3381 | goto done; | 3312 | goto done; |
3382 | } | 3313 | } |
3383 | ops->pio_out_emulated(c->src.bytes, c->dst.val, | 3314 | ops->pio_out_emulated(c->src.bytes, c->dst.val, |
@@ -3402,14 +3333,14 @@ special_insn: | |||
3402 | break; | 3333 | break; |
3403 | case 0xfa: /* cli */ | 3334 | case 0xfa: /* cli */ |
3404 | if (emulator_bad_iopl(ctxt, ops)) { | 3335 | if (emulator_bad_iopl(ctxt, ops)) { |
3405 | emulate_gp(ctxt, 0); | 3336 | rc = emulate_gp(ctxt, 0); |
3406 | goto done; | 3337 | goto done; |
3407 | } else | 3338 | } else |
3408 | ctxt->eflags &= ~X86_EFLAGS_IF; | 3339 | ctxt->eflags &= ~X86_EFLAGS_IF; |
3409 | break; | 3340 | break; |
3410 | case 0xfb: /* sti */ | 3341 | case 0xfb: /* sti */ |
3411 | if (emulator_bad_iopl(ctxt, ops)) { | 3342 | if (emulator_bad_iopl(ctxt, ops)) { |
3412 | emulate_gp(ctxt, 0); | 3343 | rc = emulate_gp(ctxt, 0); |
3413 | goto done; | 3344 | goto done; |
3414 | } else { | 3345 | } else { |
3415 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; | 3346 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; |
@@ -3449,11 +3380,11 @@ writeback: | |||
3449 | c->dst.type = saved_dst_type; | 3380 | c->dst.type = saved_dst_type; |
3450 | 3381 | ||
3451 | if ((c->d & SrcMask) == SrcSI) | 3382 | if ((c->d & SrcMask) == SrcSI) |
3452 | string_addr_inc(ctxt, seg_override_base(ctxt, ops, c), | 3383 | string_addr_inc(ctxt, seg_override(ctxt, ops, c), |
3453 | VCPU_REGS_RSI, &c->src); | 3384 | VCPU_REGS_RSI, &c->src); |
3454 | 3385 | ||
3455 | if ((c->d & DstMask) == DstDI) | 3386 | if ((c->d & DstMask) == DstDI) |
3456 | string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI, | 3387 | string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI, |
3457 | &c->dst); | 3388 | &c->dst); |
3458 | 3389 | ||
3459 | if (c->rep_prefix && (c->d & String)) { | 3390 | if (c->rep_prefix && (c->d & String)) { |
@@ -3482,6 +3413,8 @@ writeback: | |||
3482 | ctxt->eip = c->eip; | 3413 | ctxt->eip = c->eip; |
3483 | 3414 | ||
3484 | done: | 3415 | done: |
3416 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
3417 | ctxt->have_exception = true; | ||
3485 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 3418 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
3486 | 3419 | ||
3487 | twobyte_insn: | 3420 | twobyte_insn: |
@@ -3544,9 +3477,11 @@ twobyte_insn: | |||
3544 | break; | 3477 | break; |
3545 | case 5: /* not defined */ | 3478 | case 5: /* not defined */ |
3546 | emulate_ud(ctxt); | 3479 | emulate_ud(ctxt); |
3480 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3547 | goto done; | 3481 | goto done; |
3548 | case 7: /* invlpg*/ | 3482 | case 7: /* invlpg*/ |
3549 | emulate_invlpg(ctxt->vcpu, c->src.addr.mem); | 3483 | emulate_invlpg(ctxt->vcpu, |
3484 | linear(ctxt, c->src.addr.mem)); | ||
3550 | /* Disable writeback. */ | 3485 | /* Disable writeback. */ |
3551 | c->dst.type = OP_NONE; | 3486 | c->dst.type = OP_NONE; |
3552 | break; | 3487 | break; |
@@ -3573,6 +3508,7 @@ twobyte_insn: | |||
3573 | case 5 ... 7: | 3508 | case 5 ... 7: |
3574 | case 9 ... 15: | 3509 | case 9 ... 15: |
3575 | emulate_ud(ctxt); | 3510 | emulate_ud(ctxt); |
3511 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3576 | goto done; | 3512 | goto done; |
3577 | } | 3513 | } |
3578 | c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); | 3514 | c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); |
@@ -3581,6 +3517,7 @@ twobyte_insn: | |||
3581 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3517 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
3582 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3518 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
3583 | emulate_ud(ctxt); | 3519 | emulate_ud(ctxt); |
3520 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3584 | goto done; | 3521 | goto done; |
3585 | } | 3522 | } |
3586 | ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); | 3523 | ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); |
@@ -3588,6 +3525,7 @@ twobyte_insn: | |||
3588 | case 0x22: /* mov reg, cr */ | 3525 | case 0x22: /* mov reg, cr */ |
3589 | if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { | 3526 | if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { |
3590 | emulate_gp(ctxt, 0); | 3527 | emulate_gp(ctxt, 0); |
3528 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3591 | goto done; | 3529 | goto done; |
3592 | } | 3530 | } |
3593 | c->dst.type = OP_NONE; | 3531 | c->dst.type = OP_NONE; |
@@ -3596,6 +3534,7 @@ twobyte_insn: | |||
3596 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3534 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
3597 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3535 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
3598 | emulate_ud(ctxt); | 3536 | emulate_ud(ctxt); |
3537 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3599 | goto done; | 3538 | goto done; |
3600 | } | 3539 | } |
3601 | 3540 | ||
@@ -3604,6 +3543,7 @@ twobyte_insn: | |||
3604 | ~0ULL : ~0U), ctxt->vcpu) < 0) { | 3543 | ~0ULL : ~0U), ctxt->vcpu) < 0) { |
3605 | /* #UD condition is already handled by the code above */ | 3544 | /* #UD condition is already handled by the code above */ |
3606 | emulate_gp(ctxt, 0); | 3545 | emulate_gp(ctxt, 0); |
3546 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3607 | goto done; | 3547 | goto done; |
3608 | } | 3548 | } |
3609 | 3549 | ||
@@ -3615,6 +3555,7 @@ twobyte_insn: | |||
3615 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3555 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
3616 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { | 3556 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
3617 | emulate_gp(ctxt, 0); | 3557 | emulate_gp(ctxt, 0); |
3558 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3618 | goto done; | 3559 | goto done; |
3619 | } | 3560 | } |
3620 | rc = X86EMUL_CONTINUE; | 3561 | rc = X86EMUL_CONTINUE; |
@@ -3623,6 +3564,7 @@ twobyte_insn: | |||
3623 | /* rdmsr */ | 3564 | /* rdmsr */ |
3624 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { | 3565 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
3625 | emulate_gp(ctxt, 0); | 3566 | emulate_gp(ctxt, 0); |
3567 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3626 | goto done; | 3568 | goto done; |
3627 | } else { | 3569 | } else { |
3628 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3570 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
@@ -3785,6 +3727,5 @@ twobyte_insn: | |||
3785 | goto writeback; | 3727 | goto writeback; |
3786 | 3728 | ||
3787 | cannot_emulate: | 3729 | cannot_emulate: |
3788 | DPRINTF("Cannot emulate %02x\n", c->b); | ||
3789 | return -1; | 3730 | return -1; |
3790 | } | 3731 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index f628234fbeca..3cece05e4ac4 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -575,6 +575,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
575 | s->pics[1].elcr_mask = 0xde; | 575 | s->pics[1].elcr_mask = 0xde; |
576 | s->pics[0].pics_state = s; | 576 | s->pics[0].pics_state = s; |
577 | s->pics[1].pics_state = s; | 577 | s->pics[1].pics_state = s; |
578 | s->pics[0].isr_ack = 0xff; | ||
579 | s->pics[1].isr_ack = 0xff; | ||
578 | 580 | ||
579 | /* | 581 | /* |
580 | * Initialize PIO device | 582 | * Initialize PIO device |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 975bb45329a1..3377d53fcd36 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -73,6 +73,13 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) | |||
73 | return vcpu->arch.cr4 & mask; | 73 | return vcpu->arch.cr4 & mask; |
74 | } | 74 | } |
75 | 75 | ||
76 | static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) | ||
77 | { | ||
78 | if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) | ||
79 | kvm_x86_ops->decache_cr3(vcpu); | ||
80 | return vcpu->arch.cr3; | ||
81 | } | ||
82 | |||
76 | static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | 83 | static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) |
77 | { | 84 | { |
78 | return kvm_read_cr4_bits(vcpu, ~0UL); | 85 | return kvm_read_cr4_bits(vcpu, ~0UL); |
@@ -84,4 +91,19 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | |||
84 | | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); | 91 | | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); |
85 | } | 92 | } |
86 | 93 | ||
94 | static inline void enter_guest_mode(struct kvm_vcpu *vcpu) | ||
95 | { | ||
96 | vcpu->arch.hflags |= HF_GUEST_MASK; | ||
97 | } | ||
98 | |||
99 | static inline void leave_guest_mode(struct kvm_vcpu *vcpu) | ||
100 | { | ||
101 | vcpu->arch.hflags &= ~HF_GUEST_MASK; | ||
102 | } | ||
103 | |||
104 | static inline bool is_guest_mode(struct kvm_vcpu *vcpu) | ||
105 | { | ||
106 | return vcpu->arch.hflags & HF_GUEST_MASK; | ||
107 | } | ||
108 | |||
87 | #endif | 109 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 413f8973a855..93cf9d0d3653 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -277,7 +277,8 @@ static void apic_update_ppr(struct kvm_lapic *apic) | |||
277 | 277 | ||
278 | if (old_ppr != ppr) { | 278 | if (old_ppr != ppr) { |
279 | apic_set_reg(apic, APIC_PROCPRI, ppr); | 279 | apic_set_reg(apic, APIC_PROCPRI, ppr); |
280 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 280 | if (ppr < old_ppr) |
281 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | ||
281 | } | 282 | } |
282 | } | 283 | } |
283 | 284 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fb8b376bf28c..f02b8edc3d44 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -18,9 +18,11 @@ | |||
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include "irq.h" | ||
21 | #include "mmu.h" | 22 | #include "mmu.h" |
22 | #include "x86.h" | 23 | #include "x86.h" |
23 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
25 | #include "x86.h" | ||
24 | 26 | ||
25 | #include <linux/kvm_host.h> | 27 | #include <linux/kvm_host.h> |
26 | #include <linux/types.h> | 28 | #include <linux/types.h> |
@@ -194,7 +196,6 @@ static struct percpu_counter kvm_total_used_mmu_pages; | |||
194 | 196 | ||
195 | static u64 __read_mostly shadow_trap_nonpresent_pte; | 197 | static u64 __read_mostly shadow_trap_nonpresent_pte; |
196 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | 198 | static u64 __read_mostly shadow_notrap_nonpresent_pte; |
197 | static u64 __read_mostly shadow_base_present_pte; | ||
198 | static u64 __read_mostly shadow_nx_mask; | 199 | static u64 __read_mostly shadow_nx_mask; |
199 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | 200 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ |
200 | static u64 __read_mostly shadow_user_mask; | 201 | static u64 __read_mostly shadow_user_mask; |
@@ -213,12 +214,6 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | |||
213 | } | 214 | } |
214 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | 215 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); |
215 | 216 | ||
216 | void kvm_mmu_set_base_ptes(u64 base_pte) | ||
217 | { | ||
218 | shadow_base_present_pte = base_pte; | ||
219 | } | ||
220 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | ||
221 | |||
222 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 217 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
223 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | 218 | u64 dirty_mask, u64 nx_mask, u64 x_mask) |
224 | { | 219 | { |
@@ -482,46 +477,46 @@ static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) | |||
482 | } | 477 | } |
483 | 478 | ||
484 | /* | 479 | /* |
485 | * Return the pointer to the largepage write count for a given | 480 | * Return the pointer to the large page information for a given gfn, |
486 | * gfn, handling slots that are not large page aligned. | 481 | * handling slots that are not large page aligned. |
487 | */ | 482 | */ |
488 | static int *slot_largepage_idx(gfn_t gfn, | 483 | static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, |
489 | struct kvm_memory_slot *slot, | 484 | struct kvm_memory_slot *slot, |
490 | int level) | 485 | int level) |
491 | { | 486 | { |
492 | unsigned long idx; | 487 | unsigned long idx; |
493 | 488 | ||
494 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 489 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
495 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 490 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
496 | return &slot->lpage_info[level - 2][idx].write_count; | 491 | return &slot->lpage_info[level - 2][idx]; |
497 | } | 492 | } |
498 | 493 | ||
499 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | 494 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) |
500 | { | 495 | { |
501 | struct kvm_memory_slot *slot; | 496 | struct kvm_memory_slot *slot; |
502 | int *write_count; | 497 | struct kvm_lpage_info *linfo; |
503 | int i; | 498 | int i; |
504 | 499 | ||
505 | slot = gfn_to_memslot(kvm, gfn); | 500 | slot = gfn_to_memslot(kvm, gfn); |
506 | for (i = PT_DIRECTORY_LEVEL; | 501 | for (i = PT_DIRECTORY_LEVEL; |
507 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 502 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
508 | write_count = slot_largepage_idx(gfn, slot, i); | 503 | linfo = lpage_info_slot(gfn, slot, i); |
509 | *write_count += 1; | 504 | linfo->write_count += 1; |
510 | } | 505 | } |
511 | } | 506 | } |
512 | 507 | ||
513 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 508 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) |
514 | { | 509 | { |
515 | struct kvm_memory_slot *slot; | 510 | struct kvm_memory_slot *slot; |
516 | int *write_count; | 511 | struct kvm_lpage_info *linfo; |
517 | int i; | 512 | int i; |
518 | 513 | ||
519 | slot = gfn_to_memslot(kvm, gfn); | 514 | slot = gfn_to_memslot(kvm, gfn); |
520 | for (i = PT_DIRECTORY_LEVEL; | 515 | for (i = PT_DIRECTORY_LEVEL; |
521 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 516 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
522 | write_count = slot_largepage_idx(gfn, slot, i); | 517 | linfo = lpage_info_slot(gfn, slot, i); |
523 | *write_count -= 1; | 518 | linfo->write_count -= 1; |
524 | WARN_ON(*write_count < 0); | 519 | WARN_ON(linfo->write_count < 0); |
525 | } | 520 | } |
526 | } | 521 | } |
527 | 522 | ||
@@ -530,12 +525,12 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
530 | int level) | 525 | int level) |
531 | { | 526 | { |
532 | struct kvm_memory_slot *slot; | 527 | struct kvm_memory_slot *slot; |
533 | int *largepage_idx; | 528 | struct kvm_lpage_info *linfo; |
534 | 529 | ||
535 | slot = gfn_to_memslot(kvm, gfn); | 530 | slot = gfn_to_memslot(kvm, gfn); |
536 | if (slot) { | 531 | if (slot) { |
537 | largepage_idx = slot_largepage_idx(gfn, slot, level); | 532 | linfo = lpage_info_slot(gfn, slot, level); |
538 | return *largepage_idx; | 533 | return linfo->write_count; |
539 | } | 534 | } |
540 | 535 | ||
541 | return 1; | 536 | return 1; |
@@ -559,14 +554,18 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
559 | return ret; | 554 | return ret; |
560 | } | 555 | } |
561 | 556 | ||
562 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 557 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
563 | { | 558 | { |
564 | struct kvm_memory_slot *slot; | 559 | struct kvm_memory_slot *slot; |
565 | int host_level, level, max_level; | ||
566 | |||
567 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 560 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); |
568 | if (slot && slot->dirty_bitmap) | 561 | if (slot && slot->dirty_bitmap) |
569 | return PT_PAGE_TABLE_LEVEL; | 562 | return true; |
563 | return false; | ||
564 | } | ||
565 | |||
566 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
567 | { | ||
568 | int host_level, level, max_level; | ||
570 | 569 | ||
571 | host_level = host_mapping_level(vcpu->kvm, large_gfn); | 570 | host_level = host_mapping_level(vcpu->kvm, large_gfn); |
572 | 571 | ||
@@ -590,16 +589,15 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
590 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 589 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
591 | { | 590 | { |
592 | struct kvm_memory_slot *slot; | 591 | struct kvm_memory_slot *slot; |
593 | unsigned long idx; | 592 | struct kvm_lpage_info *linfo; |
594 | 593 | ||
595 | slot = gfn_to_memslot(kvm, gfn); | 594 | slot = gfn_to_memslot(kvm, gfn); |
596 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 595 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
597 | return &slot->rmap[gfn - slot->base_gfn]; | 596 | return &slot->rmap[gfn - slot->base_gfn]; |
598 | 597 | ||
599 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 598 | linfo = lpage_info_slot(gfn, slot, level); |
600 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | ||
601 | 599 | ||
602 | return &slot->lpage_info[level - 2][idx].rmap_pde; | 600 | return &linfo->rmap_pde; |
603 | } | 601 | } |
604 | 602 | ||
605 | /* | 603 | /* |
@@ -887,19 +885,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
887 | end = start + (memslot->npages << PAGE_SHIFT); | 885 | end = start + (memslot->npages << PAGE_SHIFT); |
888 | if (hva >= start && hva < end) { | 886 | if (hva >= start && hva < end) { |
889 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 887 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
888 | gfn_t gfn = memslot->base_gfn + gfn_offset; | ||
890 | 889 | ||
891 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 890 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
892 | 891 | ||
893 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 892 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
894 | unsigned long idx; | 893 | struct kvm_lpage_info *linfo; |
895 | int sh; | 894 | |
896 | 895 | linfo = lpage_info_slot(gfn, memslot, | |
897 | sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); | 896 | PT_DIRECTORY_LEVEL + j); |
898 | idx = ((memslot->base_gfn+gfn_offset) >> sh) - | 897 | ret |= handler(kvm, &linfo->rmap_pde, data); |
899 | (memslot->base_gfn >> sh); | ||
900 | ret |= handler(kvm, | ||
901 | &memslot->lpage_info[j][idx].rmap_pde, | ||
902 | data); | ||
903 | } | 898 | } |
904 | trace_kvm_age_page(hva, memslot, ret); | 899 | trace_kvm_age_page(hva, memslot, ret); |
905 | retval |= ret; | 900 | retval |= ret; |
@@ -950,6 +945,35 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
950 | return young; | 945 | return young; |
951 | } | 946 | } |
952 | 947 | ||
948 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
949 | unsigned long data) | ||
950 | { | ||
951 | u64 *spte; | ||
952 | int young = 0; | ||
953 | |||
954 | /* | ||
955 | * If there's no access bit in the secondary pte set by the | ||
956 | * hardware it's up to gup-fast/gup to set the access bit in | ||
957 | * the primary pte or in the page structure. | ||
958 | */ | ||
959 | if (!shadow_accessed_mask) | ||
960 | goto out; | ||
961 | |||
962 | spte = rmap_next(kvm, rmapp, NULL); | ||
963 | while (spte) { | ||
964 | u64 _spte = *spte; | ||
965 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | ||
966 | young = _spte & PT_ACCESSED_MASK; | ||
967 | if (young) { | ||
968 | young = 1; | ||
969 | break; | ||
970 | } | ||
971 | spte = rmap_next(kvm, rmapp, spte); | ||
972 | } | ||
973 | out: | ||
974 | return young; | ||
975 | } | ||
976 | |||
953 | #define RMAP_RECYCLE_THRESHOLD 1000 | 977 | #define RMAP_RECYCLE_THRESHOLD 1000 |
954 | 978 | ||
955 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 979 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
@@ -970,6 +994,11 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva) | |||
970 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 994 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
971 | } | 995 | } |
972 | 996 | ||
997 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
998 | { | ||
999 | return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp); | ||
1000 | } | ||
1001 | |||
973 | #ifdef MMU_DEBUG | 1002 | #ifdef MMU_DEBUG |
974 | static int is_empty_shadow_page(u64 *spt) | 1003 | static int is_empty_shadow_page(u64 *spt) |
975 | { | 1004 | { |
@@ -1161,7 +1190,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | |||
1161 | } | 1190 | } |
1162 | 1191 | ||
1163 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1192 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
1164 | struct kvm_mmu_page *sp, bool clear_unsync) | 1193 | struct kvm_mmu_page *sp) |
1165 | { | 1194 | { |
1166 | return 1; | 1195 | return 1; |
1167 | } | 1196 | } |
@@ -1291,7 +1320,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1291 | if (clear_unsync) | 1320 | if (clear_unsync) |
1292 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1321 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1293 | 1322 | ||
1294 | if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { | 1323 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { |
1295 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); | 1324 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1296 | return 1; | 1325 | return 1; |
1297 | } | 1326 | } |
@@ -1332,12 +1361,12 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
1332 | continue; | 1361 | continue; |
1333 | 1362 | ||
1334 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | 1363 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); |
1364 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
1335 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || | 1365 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || |
1336 | (vcpu->arch.mmu.sync_page(vcpu, s, true))) { | 1366 | (vcpu->arch.mmu.sync_page(vcpu, s))) { |
1337 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); | 1367 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); |
1338 | continue; | 1368 | continue; |
1339 | } | 1369 | } |
1340 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
1341 | flush = true; | 1370 | flush = true; |
1342 | } | 1371 | } |
1343 | 1372 | ||
@@ -1963,9 +1992,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1963 | unsigned pte_access, int user_fault, | 1992 | unsigned pte_access, int user_fault, |
1964 | int write_fault, int dirty, int level, | 1993 | int write_fault, int dirty, int level, |
1965 | gfn_t gfn, pfn_t pfn, bool speculative, | 1994 | gfn_t gfn, pfn_t pfn, bool speculative, |
1966 | bool can_unsync, bool reset_host_protection) | 1995 | bool can_unsync, bool host_writable) |
1967 | { | 1996 | { |
1968 | u64 spte; | 1997 | u64 spte, entry = *sptep; |
1969 | int ret = 0; | 1998 | int ret = 0; |
1970 | 1999 | ||
1971 | /* | 2000 | /* |
@@ -1973,7 +2002,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1973 | * whether the guest actually used the pte (in order to detect | 2002 | * whether the guest actually used the pte (in order to detect |
1974 | * demand paging). | 2003 | * demand paging). |
1975 | */ | 2004 | */ |
1976 | spte = shadow_base_present_pte; | 2005 | spte = PT_PRESENT_MASK; |
1977 | if (!speculative) | 2006 | if (!speculative) |
1978 | spte |= shadow_accessed_mask; | 2007 | spte |= shadow_accessed_mask; |
1979 | if (!dirty) | 2008 | if (!dirty) |
@@ -1990,8 +2019,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1990 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 2019 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1991 | kvm_is_mmio_pfn(pfn)); | 2020 | kvm_is_mmio_pfn(pfn)); |
1992 | 2021 | ||
1993 | if (reset_host_protection) | 2022 | if (host_writable) |
1994 | spte |= SPTE_HOST_WRITEABLE; | 2023 | spte |= SPTE_HOST_WRITEABLE; |
2024 | else | ||
2025 | pte_access &= ~ACC_WRITE_MASK; | ||
1995 | 2026 | ||
1996 | spte |= (u64)pfn << PAGE_SHIFT; | 2027 | spte |= (u64)pfn << PAGE_SHIFT; |
1997 | 2028 | ||
@@ -2036,6 +2067,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2036 | 2067 | ||
2037 | set_pte: | 2068 | set_pte: |
2038 | update_spte(sptep, spte); | 2069 | update_spte(sptep, spte); |
2070 | /* | ||
2071 | * If we overwrite a writable spte with a read-only one we | ||
2072 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
2073 | * will find a read-only spte, even though the writable spte | ||
2074 | * might be cached on a CPU's TLB. | ||
2075 | */ | ||
2076 | if (is_writable_pte(entry) && !is_writable_pte(*sptep)) | ||
2077 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
2039 | done: | 2078 | done: |
2040 | return ret; | 2079 | return ret; |
2041 | } | 2080 | } |
@@ -2045,7 +2084,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2045 | int user_fault, int write_fault, int dirty, | 2084 | int user_fault, int write_fault, int dirty, |
2046 | int *ptwrite, int level, gfn_t gfn, | 2085 | int *ptwrite, int level, gfn_t gfn, |
2047 | pfn_t pfn, bool speculative, | 2086 | pfn_t pfn, bool speculative, |
2048 | bool reset_host_protection) | 2087 | bool host_writable) |
2049 | { | 2088 | { |
2050 | int was_rmapped = 0; | 2089 | int was_rmapped = 0; |
2051 | int rmap_count; | 2090 | int rmap_count; |
@@ -2080,7 +2119,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2080 | 2119 | ||
2081 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 2120 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
2082 | dirty, level, gfn, pfn, speculative, true, | 2121 | dirty, level, gfn, pfn, speculative, true, |
2083 | reset_host_protection)) { | 2122 | host_writable)) { |
2084 | if (write_fault) | 2123 | if (write_fault) |
2085 | *ptwrite = 1; | 2124 | *ptwrite = 1; |
2086 | kvm_mmu_flush_tlb(vcpu); | 2125 | kvm_mmu_flush_tlb(vcpu); |
@@ -2211,7 +2250,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) | |||
2211 | } | 2250 | } |
2212 | 2251 | ||
2213 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 2252 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
2214 | int level, gfn_t gfn, pfn_t pfn) | 2253 | int map_writable, int level, gfn_t gfn, pfn_t pfn, |
2254 | bool prefault) | ||
2215 | { | 2255 | { |
2216 | struct kvm_shadow_walk_iterator iterator; | 2256 | struct kvm_shadow_walk_iterator iterator; |
2217 | struct kvm_mmu_page *sp; | 2257 | struct kvm_mmu_page *sp; |
@@ -2220,9 +2260,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2220 | 2260 | ||
2221 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2261 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
2222 | if (iterator.level == level) { | 2262 | if (iterator.level == level) { |
2223 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 2263 | unsigned pte_access = ACC_ALL; |
2264 | |||
2265 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, | ||
2224 | 0, write, 1, &pt_write, | 2266 | 0, write, 1, &pt_write, |
2225 | level, gfn, pfn, false, true); | 2267 | level, gfn, pfn, prefault, map_writable); |
2226 | direct_pte_prefetch(vcpu, iterator.sptep); | 2268 | direct_pte_prefetch(vcpu, iterator.sptep); |
2227 | ++vcpu->stat.pf_fixed; | 2269 | ++vcpu->stat.pf_fixed; |
2228 | break; | 2270 | break; |
@@ -2277,27 +2319,81 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | |||
2277 | return 1; | 2319 | return 1; |
2278 | } | 2320 | } |
2279 | 2321 | ||
2280 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 2322 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, |
2323 | gfn_t *gfnp, pfn_t *pfnp, int *levelp) | ||
2324 | { | ||
2325 | pfn_t pfn = *pfnp; | ||
2326 | gfn_t gfn = *gfnp; | ||
2327 | int level = *levelp; | ||
2328 | |||
2329 | /* | ||
2330 | * Check if it's a transparent hugepage. If this would be an | ||
2331 | * hugetlbfs page, level wouldn't be set to | ||
2332 | * PT_PAGE_TABLE_LEVEL and there would be no adjustment done | ||
2333 | * here. | ||
2334 | */ | ||
2335 | if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && | ||
2336 | level == PT_PAGE_TABLE_LEVEL && | ||
2337 | PageTransCompound(pfn_to_page(pfn)) && | ||
2338 | !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { | ||
2339 | unsigned long mask; | ||
2340 | /* | ||
2341 | * mmu_notifier_retry was successful and we hold the | ||
2342 | * mmu_lock here, so the pmd can't become splitting | ||
2343 | * from under us, and in turn | ||
2344 | * __split_huge_page_refcount() can't run from under | ||
2345 | * us and we can safely transfer the refcount from | ||
2346 | * PG_tail to PG_head as we switch the pfn to tail to | ||
2347 | * head. | ||
2348 | */ | ||
2349 | *levelp = level = PT_DIRECTORY_LEVEL; | ||
2350 | mask = KVM_PAGES_PER_HPAGE(level) - 1; | ||
2351 | VM_BUG_ON((gfn & mask) != (pfn & mask)); | ||
2352 | if (pfn & mask) { | ||
2353 | gfn &= ~mask; | ||
2354 | *gfnp = gfn; | ||
2355 | kvm_release_pfn_clean(pfn); | ||
2356 | pfn &= ~mask; | ||
2357 | if (!get_page_unless_zero(pfn_to_page(pfn))) | ||
2358 | BUG(); | ||
2359 | *pfnp = pfn; | ||
2360 | } | ||
2361 | } | ||
2362 | } | ||
2363 | |||
2364 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | ||
2365 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | ||
2366 | |||
2367 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | ||
2368 | bool prefault) | ||
2281 | { | 2369 | { |
2282 | int r; | 2370 | int r; |
2283 | int level; | 2371 | int level; |
2372 | int force_pt_level; | ||
2284 | pfn_t pfn; | 2373 | pfn_t pfn; |
2285 | unsigned long mmu_seq; | 2374 | unsigned long mmu_seq; |
2375 | bool map_writable; | ||
2286 | 2376 | ||
2287 | level = mapping_level(vcpu, gfn); | 2377 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); |
2288 | 2378 | if (likely(!force_pt_level)) { | |
2289 | /* | 2379 | level = mapping_level(vcpu, gfn); |
2290 | * This path builds a PAE pagetable - so we can map 2mb pages at | 2380 | /* |
2291 | * maximum. Therefore check if the level is larger than that. | 2381 | * This path builds a PAE pagetable - so we can map |
2292 | */ | 2382 | * 2mb pages at maximum. Therefore check if the level |
2293 | if (level > PT_DIRECTORY_LEVEL) | 2383 | * is larger than that. |
2294 | level = PT_DIRECTORY_LEVEL; | 2384 | */ |
2385 | if (level > PT_DIRECTORY_LEVEL) | ||
2386 | level = PT_DIRECTORY_LEVEL; | ||
2295 | 2387 | ||
2296 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 2388 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
2389 | } else | ||
2390 | level = PT_PAGE_TABLE_LEVEL; | ||
2297 | 2391 | ||
2298 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2392 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2299 | smp_rmb(); | 2393 | smp_rmb(); |
2300 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2394 | |
2395 | if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) | ||
2396 | return 0; | ||
2301 | 2397 | ||
2302 | /* mmio */ | 2398 | /* mmio */ |
2303 | if (is_error_pfn(pfn)) | 2399 | if (is_error_pfn(pfn)) |
@@ -2307,7 +2403,10 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
2307 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2403 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2308 | goto out_unlock; | 2404 | goto out_unlock; |
2309 | kvm_mmu_free_some_pages(vcpu); | 2405 | kvm_mmu_free_some_pages(vcpu); |
2310 | r = __direct_map(vcpu, v, write, level, gfn, pfn); | 2406 | if (likely(!force_pt_level)) |
2407 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | ||
2408 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, | ||
2409 | prefault); | ||
2311 | spin_unlock(&vcpu->kvm->mmu_lock); | 2410 | spin_unlock(&vcpu->kvm->mmu_lock); |
2312 | 2411 | ||
2313 | 2412 | ||
@@ -2394,7 +2493,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
2394 | ASSERT(!VALID_PAGE(root)); | 2493 | ASSERT(!VALID_PAGE(root)); |
2395 | spin_lock(&vcpu->kvm->mmu_lock); | 2494 | spin_lock(&vcpu->kvm->mmu_lock); |
2396 | kvm_mmu_free_some_pages(vcpu); | 2495 | kvm_mmu_free_some_pages(vcpu); |
2397 | sp = kvm_mmu_get_page(vcpu, i << 30, i << 30, | 2496 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), |
2497 | i << 30, | ||
2398 | PT32_ROOT_LEVEL, 1, ACC_ALL, | 2498 | PT32_ROOT_LEVEL, 1, ACC_ALL, |
2399 | NULL); | 2499 | NULL); |
2400 | root = __pa(sp->spt); | 2500 | root = __pa(sp->spt); |
@@ -2529,6 +2629,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2529 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2629 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2530 | sp = page_header(root); | 2630 | sp = page_header(root); |
2531 | mmu_sync_children(vcpu, sp); | 2631 | mmu_sync_children(vcpu, sp); |
2632 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | ||
2532 | return; | 2633 | return; |
2533 | } | 2634 | } |
2534 | for (i = 0; i < 4; ++i) { | 2635 | for (i = 0; i < 4; ++i) { |
@@ -2551,23 +2652,24 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2551 | } | 2652 | } |
2552 | 2653 | ||
2553 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, | 2654 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
2554 | u32 access, u32 *error) | 2655 | u32 access, struct x86_exception *exception) |
2555 | { | 2656 | { |
2556 | if (error) | 2657 | if (exception) |
2557 | *error = 0; | 2658 | exception->error_code = 0; |
2558 | return vaddr; | 2659 | return vaddr; |
2559 | } | 2660 | } |
2560 | 2661 | ||
2561 | static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, | 2662 | static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, |
2562 | u32 access, u32 *error) | 2663 | u32 access, |
2664 | struct x86_exception *exception) | ||
2563 | { | 2665 | { |
2564 | if (error) | 2666 | if (exception) |
2565 | *error = 0; | 2667 | exception->error_code = 0; |
2566 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); | 2668 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); |
2567 | } | 2669 | } |
2568 | 2670 | ||
2569 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 2671 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
2570 | u32 error_code) | 2672 | u32 error_code, bool prefault) |
2571 | { | 2673 | { |
2572 | gfn_t gfn; | 2674 | gfn_t gfn; |
2573 | int r; | 2675 | int r; |
@@ -2583,17 +2685,68 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
2583 | gfn = gva >> PAGE_SHIFT; | 2685 | gfn = gva >> PAGE_SHIFT; |
2584 | 2686 | ||
2585 | return nonpaging_map(vcpu, gva & PAGE_MASK, | 2687 | return nonpaging_map(vcpu, gva & PAGE_MASK, |
2586 | error_code & PFERR_WRITE_MASK, gfn); | 2688 | error_code & PFERR_WRITE_MASK, gfn, prefault); |
2689 | } | ||
2690 | |||
2691 | static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | ||
2692 | { | ||
2693 | struct kvm_arch_async_pf arch; | ||
2694 | |||
2695 | arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; | ||
2696 | arch.gfn = gfn; | ||
2697 | arch.direct_map = vcpu->arch.mmu.direct_map; | ||
2698 | arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); | ||
2699 | |||
2700 | return kvm_setup_async_pf(vcpu, gva, gfn, &arch); | ||
2701 | } | ||
2702 | |||
2703 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) | ||
2704 | { | ||
2705 | if (unlikely(!irqchip_in_kernel(vcpu->kvm) || | ||
2706 | kvm_event_needs_reinjection(vcpu))) | ||
2707 | return false; | ||
2708 | |||
2709 | return kvm_x86_ops->interrupt_allowed(vcpu); | ||
2587 | } | 2710 | } |
2588 | 2711 | ||
2589 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | 2712 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2590 | u32 error_code) | 2713 | gva_t gva, pfn_t *pfn, bool write, bool *writable) |
2714 | { | ||
2715 | bool async; | ||
2716 | |||
2717 | *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable); | ||
2718 | |||
2719 | if (!async) | ||
2720 | return false; /* *pfn has correct page already */ | ||
2721 | |||
2722 | put_page(pfn_to_page(*pfn)); | ||
2723 | |||
2724 | if (!prefault && can_do_async_pf(vcpu)) { | ||
2725 | trace_kvm_try_async_get_page(gva, gfn); | ||
2726 | if (kvm_find_async_pf_gfn(vcpu, gfn)) { | ||
2727 | trace_kvm_async_pf_doublefault(gva, gfn); | ||
2728 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
2729 | return true; | ||
2730 | } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) | ||
2731 | return true; | ||
2732 | } | ||
2733 | |||
2734 | *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable); | ||
2735 | |||
2736 | return false; | ||
2737 | } | ||
2738 | |||
2739 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | ||
2740 | bool prefault) | ||
2591 | { | 2741 | { |
2592 | pfn_t pfn; | 2742 | pfn_t pfn; |
2593 | int r; | 2743 | int r; |
2594 | int level; | 2744 | int level; |
2745 | int force_pt_level; | ||
2595 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2746 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2596 | unsigned long mmu_seq; | 2747 | unsigned long mmu_seq; |
2748 | int write = error_code & PFERR_WRITE_MASK; | ||
2749 | bool map_writable; | ||
2597 | 2750 | ||
2598 | ASSERT(vcpu); | 2751 | ASSERT(vcpu); |
2599 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 2752 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
@@ -2602,21 +2755,30 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
2602 | if (r) | 2755 | if (r) |
2603 | return r; | 2756 | return r; |
2604 | 2757 | ||
2605 | level = mapping_level(vcpu, gfn); | 2758 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); |
2606 | 2759 | if (likely(!force_pt_level)) { | |
2607 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 2760 | level = mapping_level(vcpu, gfn); |
2761 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | ||
2762 | } else | ||
2763 | level = PT_PAGE_TABLE_LEVEL; | ||
2608 | 2764 | ||
2609 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2765 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2610 | smp_rmb(); | 2766 | smp_rmb(); |
2611 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2767 | |
2768 | if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) | ||
2769 | return 0; | ||
2770 | |||
2771 | /* mmio */ | ||
2612 | if (is_error_pfn(pfn)) | 2772 | if (is_error_pfn(pfn)) |
2613 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); | 2773 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
2614 | spin_lock(&vcpu->kvm->mmu_lock); | 2774 | spin_lock(&vcpu->kvm->mmu_lock); |
2615 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2775 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2616 | goto out_unlock; | 2776 | goto out_unlock; |
2617 | kvm_mmu_free_some_pages(vcpu); | 2777 | kvm_mmu_free_some_pages(vcpu); |
2618 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 2778 | if (likely(!force_pt_level)) |
2619 | level, gfn, pfn); | 2779 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
2780 | r = __direct_map(vcpu, gpa, write, map_writable, | ||
2781 | level, gfn, pfn, prefault); | ||
2620 | spin_unlock(&vcpu->kvm->mmu_lock); | 2782 | spin_unlock(&vcpu->kvm->mmu_lock); |
2621 | 2783 | ||
2622 | return r; | 2784 | return r; |
@@ -2658,18 +2820,19 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
2658 | 2820 | ||
2659 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 2821 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
2660 | { | 2822 | { |
2661 | pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); | 2823 | pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu)); |
2662 | mmu_free_roots(vcpu); | 2824 | mmu_free_roots(vcpu); |
2663 | } | 2825 | } |
2664 | 2826 | ||
2665 | static unsigned long get_cr3(struct kvm_vcpu *vcpu) | 2827 | static unsigned long get_cr3(struct kvm_vcpu *vcpu) |
2666 | { | 2828 | { |
2667 | return vcpu->arch.cr3; | 2829 | return kvm_read_cr3(vcpu); |
2668 | } | 2830 | } |
2669 | 2831 | ||
2670 | static void inject_page_fault(struct kvm_vcpu *vcpu) | 2832 | static void inject_page_fault(struct kvm_vcpu *vcpu, |
2833 | struct x86_exception *fault) | ||
2671 | { | 2834 | { |
2672 | vcpu->arch.mmu.inject_page_fault(vcpu); | 2835 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); |
2673 | } | 2836 | } |
2674 | 2837 | ||
2675 | static void paging_free(struct kvm_vcpu *vcpu) | 2838 | static void paging_free(struct kvm_vcpu *vcpu) |
@@ -2815,6 +2978,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2815 | { | 2978 | { |
2816 | struct kvm_mmu *context = vcpu->arch.walk_mmu; | 2979 | struct kvm_mmu *context = vcpu->arch.walk_mmu; |
2817 | 2980 | ||
2981 | context->base_role.word = 0; | ||
2818 | context->new_cr3 = nonpaging_new_cr3; | 2982 | context->new_cr3 = nonpaging_new_cr3; |
2819 | context->page_fault = tdp_page_fault; | 2983 | context->page_fault = tdp_page_fault; |
2820 | context->free = nonpaging_free; | 2984 | context->free = nonpaging_free; |
@@ -3007,9 +3171,6 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
3007 | return; | 3171 | return; |
3008 | } | 3172 | } |
3009 | 3173 | ||
3010 | if (is_rsvd_bits_set(&vcpu->arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) | ||
3011 | return; | ||
3012 | |||
3013 | ++vcpu->kvm->stat.mmu_pte_updated; | 3174 | ++vcpu->kvm->stat.mmu_pte_updated; |
3014 | if (!sp->role.cr4_pae) | 3175 | if (!sp->role.cr4_pae) |
3015 | paging32_update_pte(vcpu, sp, spte, new); | 3176 | paging32_update_pte(vcpu, sp, spte, new); |
@@ -3263,12 +3424,13 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
3263 | } | 3424 | } |
3264 | } | 3425 | } |
3265 | 3426 | ||
3266 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 3427 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
3428 | void *insn, int insn_len) | ||
3267 | { | 3429 | { |
3268 | int r; | 3430 | int r; |
3269 | enum emulation_result er; | 3431 | enum emulation_result er; |
3270 | 3432 | ||
3271 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); | 3433 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); |
3272 | if (r < 0) | 3434 | if (r < 0) |
3273 | goto out; | 3435 | goto out; |
3274 | 3436 | ||
@@ -3281,7 +3443,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
3281 | if (r) | 3443 | if (r) |
3282 | goto out; | 3444 | goto out; |
3283 | 3445 | ||
3284 | er = emulate_instruction(vcpu, cr2, error_code, 0); | 3446 | er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); |
3285 | 3447 | ||
3286 | switch (er) { | 3448 | switch (er) { |
3287 | case EMULATE_DONE: | 3449 | case EMULATE_DONE: |
@@ -3376,11 +3538,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3376 | if (!test_bit(slot, sp->slot_bitmap)) | 3538 | if (!test_bit(slot, sp->slot_bitmap)) |
3377 | continue; | 3539 | continue; |
3378 | 3540 | ||
3541 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) | ||
3542 | continue; | ||
3543 | |||
3379 | pt = sp->spt; | 3544 | pt = sp->spt; |
3380 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3545 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
3381 | /* avoid RMW */ | 3546 | /* avoid RMW */ |
3382 | if (is_writable_pte(pt[i])) | 3547 | if (is_writable_pte(pt[i])) |
3383 | pt[i] &= ~PT_WRITABLE_MASK; | 3548 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); |
3384 | } | 3549 | } |
3385 | kvm_flush_remote_tlbs(kvm); | 3550 | kvm_flush_remote_tlbs(kvm); |
3386 | } | 3551 | } |
@@ -3462,13 +3627,6 @@ static void mmu_destroy_caches(void) | |||
3462 | kmem_cache_destroy(mmu_page_header_cache); | 3627 | kmem_cache_destroy(mmu_page_header_cache); |
3463 | } | 3628 | } |
3464 | 3629 | ||
3465 | void kvm_mmu_module_exit(void) | ||
3466 | { | ||
3467 | mmu_destroy_caches(); | ||
3468 | percpu_counter_destroy(&kvm_total_used_mmu_pages); | ||
3469 | unregister_shrinker(&mmu_shrinker); | ||
3470 | } | ||
3471 | |||
3472 | int kvm_mmu_module_init(void) | 3630 | int kvm_mmu_module_init(void) |
3473 | { | 3631 | { |
3474 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | 3632 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", |
@@ -3565,7 +3723,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | |||
3565 | 3723 | ||
3566 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 3724 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
3567 | { | 3725 | { |
3568 | (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); | 3726 | (void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu)); |
3569 | return 1; | 3727 | return 1; |
3570 | } | 3728 | } |
3571 | 3729 | ||
@@ -3661,12 +3819,6 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | |||
3661 | } | 3819 | } |
3662 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); | 3820 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); |
3663 | 3821 | ||
3664 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
3665 | #include "mmu_audit.c" | ||
3666 | #else | ||
3667 | static void mmu_audit_disable(void) { } | ||
3668 | #endif | ||
3669 | |||
3670 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | 3822 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) |
3671 | { | 3823 | { |
3672 | ASSERT(vcpu); | 3824 | ASSERT(vcpu); |
@@ -3674,5 +3826,18 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |||
3674 | destroy_kvm_mmu(vcpu); | 3826 | destroy_kvm_mmu(vcpu); |
3675 | free_mmu_pages(vcpu); | 3827 | free_mmu_pages(vcpu); |
3676 | mmu_free_memory_caches(vcpu); | 3828 | mmu_free_memory_caches(vcpu); |
3829 | } | ||
3830 | |||
3831 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
3832 | #include "mmu_audit.c" | ||
3833 | #else | ||
3834 | static void mmu_audit_disable(void) { } | ||
3835 | #endif | ||
3836 | |||
3837 | void kvm_mmu_module_exit(void) | ||
3838 | { | ||
3839 | mmu_destroy_caches(); | ||
3840 | percpu_counter_destroy(&kvm_total_used_mmu_pages); | ||
3841 | unregister_shrinker(&mmu_shrinker); | ||
3677 | mmu_audit_disable(); | 3842 | mmu_audit_disable(); |
3678 | } | 3843 | } |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index ba2bcdde6221..5f6223b8bcf7 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -19,11 +19,9 @@ | |||
19 | 19 | ||
20 | #include <linux/ratelimit.h> | 20 | #include <linux/ratelimit.h> |
21 | 21 | ||
22 | static int audit_point; | 22 | #define audit_printk(kvm, fmt, args...) \ |
23 | |||
24 | #define audit_printk(fmt, args...) \ | ||
25 | printk(KERN_ERR "audit: (%s) error: " \ | 23 | printk(KERN_ERR "audit: (%s) error: " \ |
26 | fmt, audit_point_name[audit_point], ##args) | 24 | fmt, audit_point_name[kvm->arch.audit_point], ##args) |
27 | 25 | ||
28 | typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level); | 26 | typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level); |
29 | 27 | ||
@@ -97,18 +95,21 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
97 | 95 | ||
98 | if (sp->unsync) { | 96 | if (sp->unsync) { |
99 | if (level != PT_PAGE_TABLE_LEVEL) { | 97 | if (level != PT_PAGE_TABLE_LEVEL) { |
100 | audit_printk("unsync sp: %p level = %d\n", sp, level); | 98 | audit_printk(vcpu->kvm, "unsync sp: %p " |
99 | "level = %d\n", sp, level); | ||
101 | return; | 100 | return; |
102 | } | 101 | } |
103 | 102 | ||
104 | if (*sptep == shadow_notrap_nonpresent_pte) { | 103 | if (*sptep == shadow_notrap_nonpresent_pte) { |
105 | audit_printk("notrap spte in unsync sp: %p\n", sp); | 104 | audit_printk(vcpu->kvm, "notrap spte in unsync " |
105 | "sp: %p\n", sp); | ||
106 | return; | 106 | return; |
107 | } | 107 | } |
108 | } | 108 | } |
109 | 109 | ||
110 | if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) { | 110 | if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) { |
111 | audit_printk("notrap spte in direct sp: %p\n", sp); | 111 | audit_printk(vcpu->kvm, "notrap spte in direct sp: %p\n", |
112 | sp); | ||
112 | return; | 113 | return; |
113 | } | 114 | } |
114 | 115 | ||
@@ -125,8 +126,9 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
125 | 126 | ||
126 | hpa = pfn << PAGE_SHIFT; | 127 | hpa = pfn << PAGE_SHIFT; |
127 | if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) | 128 | if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) |
128 | audit_printk("levels %d pfn %llx hpa %llx ent %llxn", | 129 | audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx " |
129 | vcpu->arch.mmu.root_level, pfn, hpa, *sptep); | 130 | "ent %llxn", vcpu->arch.mmu.root_level, pfn, |
131 | hpa, *sptep); | ||
130 | } | 132 | } |
131 | 133 | ||
132 | static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | 134 | static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
@@ -142,8 +144,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
142 | if (!gfn_to_memslot(kvm, gfn)) { | 144 | if (!gfn_to_memslot(kvm, gfn)) { |
143 | if (!printk_ratelimit()) | 145 | if (!printk_ratelimit()) |
144 | return; | 146 | return; |
145 | audit_printk("no memslot for gfn %llx\n", gfn); | 147 | audit_printk(kvm, "no memslot for gfn %llx\n", gfn); |
146 | audit_printk("index %ld of sp (gfn=%llx)\n", | 148 | audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", |
147 | (long int)(sptep - rev_sp->spt), rev_sp->gfn); | 149 | (long int)(sptep - rev_sp->spt), rev_sp->gfn); |
148 | dump_stack(); | 150 | dump_stack(); |
149 | return; | 151 | return; |
@@ -153,7 +155,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
153 | if (!*rmapp) { | 155 | if (!*rmapp) { |
154 | if (!printk_ratelimit()) | 156 | if (!printk_ratelimit()) |
155 | return; | 157 | return; |
156 | audit_printk("no rmap for writable spte %llx\n", *sptep); | 158 | audit_printk(kvm, "no rmap for writable spte %llx\n", |
159 | *sptep); | ||
157 | dump_stack(); | 160 | dump_stack(); |
158 | } | 161 | } |
159 | } | 162 | } |
@@ -168,8 +171,9 @@ static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
168 | { | 171 | { |
169 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | 172 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
170 | 173 | ||
171 | if (audit_point == AUDIT_POST_SYNC && sp->unsync) | 174 | if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync) |
172 | audit_printk("meet unsync sp(%p) after sync root.\n", sp); | 175 | audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync " |
176 | "root.\n", sp); | ||
173 | } | 177 | } |
174 | 178 | ||
175 | static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) | 179 | static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) |
@@ -202,8 +206,9 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
202 | spte = rmap_next(kvm, rmapp, NULL); | 206 | spte = rmap_next(kvm, rmapp, NULL); |
203 | while (spte) { | 207 | while (spte) { |
204 | if (is_writable_pte(*spte)) | 208 | if (is_writable_pte(*spte)) |
205 | audit_printk("shadow page has writable mappings: gfn " | 209 | audit_printk(kvm, "shadow page has writable " |
206 | "%llx role %x\n", sp->gfn, sp->role.word); | 210 | "mappings: gfn %llx role %x\n", |
211 | sp->gfn, sp->role.word); | ||
207 | spte = rmap_next(kvm, rmapp, spte); | 212 | spte = rmap_next(kvm, rmapp, spte); |
208 | } | 213 | } |
209 | } | 214 | } |
@@ -238,7 +243,7 @@ static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) | |||
238 | if (!__ratelimit(&ratelimit_state)) | 243 | if (!__ratelimit(&ratelimit_state)) |
239 | return; | 244 | return; |
240 | 245 | ||
241 | audit_point = point; | 246 | vcpu->kvm->arch.audit_point = point; |
242 | audit_all_active_sps(vcpu->kvm); | 247 | audit_all_active_sps(vcpu->kvm); |
243 | audit_vcpu_spte(vcpu); | 248 | audit_vcpu_spte(vcpu); |
244 | } | 249 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index cd7a833a3b52..6bccc24c4181 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -72,7 +72,7 @@ struct guest_walker { | |||
72 | unsigned pt_access; | 72 | unsigned pt_access; |
73 | unsigned pte_access; | 73 | unsigned pte_access; |
74 | gfn_t gfn; | 74 | gfn_t gfn; |
75 | u32 error_code; | 75 | struct x86_exception fault; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) | 78 | static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) |
@@ -266,21 +266,23 @@ walk: | |||
266 | return 1; | 266 | return 1; |
267 | 267 | ||
268 | error: | 268 | error: |
269 | walker->error_code = 0; | 269 | walker->fault.vector = PF_VECTOR; |
270 | walker->fault.error_code_valid = true; | ||
271 | walker->fault.error_code = 0; | ||
270 | if (present) | 272 | if (present) |
271 | walker->error_code |= PFERR_PRESENT_MASK; | 273 | walker->fault.error_code |= PFERR_PRESENT_MASK; |
272 | 274 | ||
273 | walker->error_code |= write_fault | user_fault; | 275 | walker->fault.error_code |= write_fault | user_fault; |
274 | 276 | ||
275 | if (fetch_fault && mmu->nx) | 277 | if (fetch_fault && mmu->nx) |
276 | walker->error_code |= PFERR_FETCH_MASK; | 278 | walker->fault.error_code |= PFERR_FETCH_MASK; |
277 | if (rsvd_fault) | 279 | if (rsvd_fault) |
278 | walker->error_code |= PFERR_RSVD_MASK; | 280 | walker->fault.error_code |= PFERR_RSVD_MASK; |
279 | 281 | ||
280 | vcpu->arch.fault.address = addr; | 282 | walker->fault.address = addr; |
281 | vcpu->arch.fault.error_code = walker->error_code; | 283 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; |
282 | 284 | ||
283 | trace_kvm_mmu_walker_error(walker->error_code); | 285 | trace_kvm_mmu_walker_error(walker->fault.error_code); |
284 | return 0; | 286 | return 0; |
285 | } | 287 | } |
286 | 288 | ||
@@ -299,25 +301,42 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker, | |||
299 | addr, access); | 301 | addr, access); |
300 | } | 302 | } |
301 | 303 | ||
304 | static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | ||
305 | struct kvm_mmu_page *sp, u64 *spte, | ||
306 | pt_element_t gpte) | ||
307 | { | ||
308 | u64 nonpresent = shadow_trap_nonpresent_pte; | ||
309 | |||
310 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
311 | goto no_present; | ||
312 | |||
313 | if (!is_present_gpte(gpte)) { | ||
314 | if (!sp->unsync) | ||
315 | nonpresent = shadow_notrap_nonpresent_pte; | ||
316 | goto no_present; | ||
317 | } | ||
318 | |||
319 | if (!(gpte & PT_ACCESSED_MASK)) | ||
320 | goto no_present; | ||
321 | |||
322 | return false; | ||
323 | |||
324 | no_present: | ||
325 | drop_spte(vcpu->kvm, spte, nonpresent); | ||
326 | return true; | ||
327 | } | ||
328 | |||
302 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 329 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
303 | u64 *spte, const void *pte) | 330 | u64 *spte, const void *pte) |
304 | { | 331 | { |
305 | pt_element_t gpte; | 332 | pt_element_t gpte; |
306 | unsigned pte_access; | 333 | unsigned pte_access; |
307 | pfn_t pfn; | 334 | pfn_t pfn; |
308 | u64 new_spte; | ||
309 | 335 | ||
310 | gpte = *(const pt_element_t *)pte; | 336 | gpte = *(const pt_element_t *)pte; |
311 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 337 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
312 | if (!is_present_gpte(gpte)) { | ||
313 | if (sp->unsync) | ||
314 | new_spte = shadow_trap_nonpresent_pte; | ||
315 | else | ||
316 | new_spte = shadow_notrap_nonpresent_pte; | ||
317 | __set_spte(spte, new_spte); | ||
318 | } | ||
319 | return; | 338 | return; |
320 | } | 339 | |
321 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 340 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
322 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 341 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
323 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 342 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
@@ -329,7 +348,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
329 | return; | 348 | return; |
330 | kvm_get_pfn(pfn); | 349 | kvm_get_pfn(pfn); |
331 | /* | 350 | /* |
332 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | 351 | * we call mmu_set_spte() with host_writable = true beacuse that |
333 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 352 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
334 | */ | 353 | */ |
335 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 354 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
@@ -364,7 +383,6 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
364 | u64 *sptep) | 383 | u64 *sptep) |
365 | { | 384 | { |
366 | struct kvm_mmu_page *sp; | 385 | struct kvm_mmu_page *sp; |
367 | struct kvm_mmu *mmu = &vcpu->arch.mmu; | ||
368 | pt_element_t *gptep = gw->prefetch_ptes; | 386 | pt_element_t *gptep = gw->prefetch_ptes; |
369 | u64 *spte; | 387 | u64 *spte; |
370 | int i; | 388 | int i; |
@@ -395,14 +413,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
395 | 413 | ||
396 | gpte = gptep[i]; | 414 | gpte = gptep[i]; |
397 | 415 | ||
398 | if (!is_present_gpte(gpte) || | 416 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
399 | is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) { | ||
400 | if (!sp->unsync) | ||
401 | __set_spte(spte, shadow_notrap_nonpresent_pte); | ||
402 | continue; | ||
403 | } | ||
404 | |||
405 | if (!(gpte & PT_ACCESSED_MASK)) | ||
406 | continue; | 417 | continue; |
407 | 418 | ||
408 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 419 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
@@ -427,7 +438,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
427 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 438 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
428 | struct guest_walker *gw, | 439 | struct guest_walker *gw, |
429 | int user_fault, int write_fault, int hlevel, | 440 | int user_fault, int write_fault, int hlevel, |
430 | int *ptwrite, pfn_t pfn) | 441 | int *ptwrite, pfn_t pfn, bool map_writable, |
442 | bool prefault) | ||
431 | { | 443 | { |
432 | unsigned access = gw->pt_access; | 444 | unsigned access = gw->pt_access; |
433 | struct kvm_mmu_page *sp = NULL; | 445 | struct kvm_mmu_page *sp = NULL; |
@@ -501,7 +513,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
501 | 513 | ||
502 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, | 514 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, |
503 | user_fault, write_fault, dirty, ptwrite, it.level, | 515 | user_fault, write_fault, dirty, ptwrite, it.level, |
504 | gw->gfn, pfn, false, true); | 516 | gw->gfn, pfn, prefault, map_writable); |
505 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); | 517 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); |
506 | 518 | ||
507 | return it.sptep; | 519 | return it.sptep; |
@@ -527,8 +539,8 @@ out_gpte_changed: | |||
527 | * Returns: 1 if we need to emulate the instruction, 0 otherwise, or | 539 | * Returns: 1 if we need to emulate the instruction, 0 otherwise, or |
528 | * a negative value on error. | 540 | * a negative value on error. |
529 | */ | 541 | */ |
530 | static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | 542 | static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, |
531 | u32 error_code) | 543 | bool prefault) |
532 | { | 544 | { |
533 | int write_fault = error_code & PFERR_WRITE_MASK; | 545 | int write_fault = error_code & PFERR_WRITE_MASK; |
534 | int user_fault = error_code & PFERR_USER_MASK; | 546 | int user_fault = error_code & PFERR_USER_MASK; |
@@ -538,7 +550,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
538 | int r; | 550 | int r; |
539 | pfn_t pfn; | 551 | pfn_t pfn; |
540 | int level = PT_PAGE_TABLE_LEVEL; | 552 | int level = PT_PAGE_TABLE_LEVEL; |
553 | int force_pt_level; | ||
541 | unsigned long mmu_seq; | 554 | unsigned long mmu_seq; |
555 | bool map_writable; | ||
542 | 556 | ||
543 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 557 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
544 | 558 | ||
@@ -556,19 +570,29 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
556 | */ | 570 | */ |
557 | if (!r) { | 571 | if (!r) { |
558 | pgprintk("%s: guest page fault\n", __func__); | 572 | pgprintk("%s: guest page fault\n", __func__); |
559 | inject_page_fault(vcpu); | 573 | if (!prefault) { |
560 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 574 | inject_page_fault(vcpu, &walker.fault); |
575 | /* reset fork detector */ | ||
576 | vcpu->arch.last_pt_write_count = 0; | ||
577 | } | ||
561 | return 0; | 578 | return 0; |
562 | } | 579 | } |
563 | 580 | ||
564 | if (walker.level >= PT_DIRECTORY_LEVEL) { | 581 | if (walker.level >= PT_DIRECTORY_LEVEL) |
582 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); | ||
583 | else | ||
584 | force_pt_level = 1; | ||
585 | if (!force_pt_level) { | ||
565 | level = min(walker.level, mapping_level(vcpu, walker.gfn)); | 586 | level = min(walker.level, mapping_level(vcpu, walker.gfn)); |
566 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); | 587 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); |
567 | } | 588 | } |
568 | 589 | ||
569 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 590 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
570 | smp_rmb(); | 591 | smp_rmb(); |
571 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 592 | |
593 | if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault, | ||
594 | &map_writable)) | ||
595 | return 0; | ||
572 | 596 | ||
573 | /* mmio */ | 597 | /* mmio */ |
574 | if (is_error_pfn(pfn)) | 598 | if (is_error_pfn(pfn)) |
@@ -580,8 +604,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
580 | 604 | ||
581 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 605 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
582 | kvm_mmu_free_some_pages(vcpu); | 606 | kvm_mmu_free_some_pages(vcpu); |
607 | if (!force_pt_level) | ||
608 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | ||
583 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 609 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
584 | level, &write_pt, pfn); | 610 | level, &write_pt, pfn, map_writable, prefault); |
585 | (void)sptep; | 611 | (void)sptep; |
586 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | 612 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, |
587 | sptep, *sptep, write_pt); | 613 | sptep, *sptep, write_pt); |
@@ -661,7 +687,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
661 | } | 687 | } |
662 | 688 | ||
663 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 689 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
664 | u32 *error) | 690 | struct x86_exception *exception) |
665 | { | 691 | { |
666 | struct guest_walker walker; | 692 | struct guest_walker walker; |
667 | gpa_t gpa = UNMAPPED_GVA; | 693 | gpa_t gpa = UNMAPPED_GVA; |
@@ -672,14 +698,15 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | |||
672 | if (r) { | 698 | if (r) { |
673 | gpa = gfn_to_gpa(walker.gfn); | 699 | gpa = gfn_to_gpa(walker.gfn); |
674 | gpa |= vaddr & ~PAGE_MASK; | 700 | gpa |= vaddr & ~PAGE_MASK; |
675 | } else if (error) | 701 | } else if (exception) |
676 | *error = walker.error_code; | 702 | *exception = walker.fault; |
677 | 703 | ||
678 | return gpa; | 704 | return gpa; |
679 | } | 705 | } |
680 | 706 | ||
681 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | 707 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, |
682 | u32 access, u32 *error) | 708 | u32 access, |
709 | struct x86_exception *exception) | ||
683 | { | 710 | { |
684 | struct guest_walker walker; | 711 | struct guest_walker walker; |
685 | gpa_t gpa = UNMAPPED_GVA; | 712 | gpa_t gpa = UNMAPPED_GVA; |
@@ -690,8 +717,8 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
690 | if (r) { | 717 | if (r) { |
691 | gpa = gfn_to_gpa(walker.gfn); | 718 | gpa = gfn_to_gpa(walker.gfn); |
692 | gpa |= vaddr & ~PAGE_MASK; | 719 | gpa |= vaddr & ~PAGE_MASK; |
693 | } else if (error) | 720 | } else if (exception) |
694 | *error = walker.error_code; | 721 | *exception = walker.fault; |
695 | 722 | ||
696 | return gpa; | 723 | return gpa; |
697 | } | 724 | } |
@@ -730,12 +757,19 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
730 | * Using the cached information from sp->gfns is safe because: | 757 | * Using the cached information from sp->gfns is safe because: |
731 | * - The spte has a reference to the struct page, so the pfn for a given gfn | 758 | * - The spte has a reference to the struct page, so the pfn for a given gfn |
732 | * can't change unless all sptes pointing to it are nuked first. | 759 | * can't change unless all sptes pointing to it are nuked first. |
760 | * | ||
761 | * Note: | ||
762 | * We should flush all tlbs if spte is dropped even though guest is | ||
763 | * responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page | ||
764 | * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't | ||
765 | * used by guest then tlbs are not flushed, so guest is allowed to access the | ||
766 | * freed pages. | ||
767 | * And we increase kvm->tlbs_dirty to delay tlbs flush in this case. | ||
733 | */ | 768 | */ |
734 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 769 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
735 | bool clear_unsync) | ||
736 | { | 770 | { |
737 | int i, offset, nr_present; | 771 | int i, offset, nr_present; |
738 | bool reset_host_protection; | 772 | bool host_writable; |
739 | gpa_t first_pte_gpa; | 773 | gpa_t first_pte_gpa; |
740 | 774 | ||
741 | offset = nr_present = 0; | 775 | offset = nr_present = 0; |
@@ -764,31 +798,27 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
764 | return -EINVAL; | 798 | return -EINVAL; |
765 | 799 | ||
766 | gfn = gpte_to_gfn(gpte); | 800 | gfn = gpte_to_gfn(gpte); |
767 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL) | ||
768 | || gfn != sp->gfns[i] || !is_present_gpte(gpte) | ||
769 | || !(gpte & PT_ACCESSED_MASK)) { | ||
770 | u64 nonpresent; | ||
771 | 801 | ||
772 | if (is_present_gpte(gpte) || !clear_unsync) | 802 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
773 | nonpresent = shadow_trap_nonpresent_pte; | 803 | vcpu->kvm->tlbs_dirty++; |
774 | else | 804 | continue; |
775 | nonpresent = shadow_notrap_nonpresent_pte; | 805 | } |
776 | drop_spte(vcpu->kvm, &sp->spt[i], nonpresent); | 806 | |
807 | if (gfn != sp->gfns[i]) { | ||
808 | drop_spte(vcpu->kvm, &sp->spt[i], | ||
809 | shadow_trap_nonpresent_pte); | ||
810 | vcpu->kvm->tlbs_dirty++; | ||
777 | continue; | 811 | continue; |
778 | } | 812 | } |
779 | 813 | ||
780 | nr_present++; | 814 | nr_present++; |
781 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 815 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
782 | if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { | 816 | host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; |
783 | pte_access &= ~ACC_WRITE_MASK; | 817 | |
784 | reset_host_protection = 0; | ||
785 | } else { | ||
786 | reset_host_protection = 1; | ||
787 | } | ||
788 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 818 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
789 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, | 819 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, |
790 | spte_to_pfn(sp->spt[i]), true, false, | 820 | spte_to_pfn(sp->spt[i]), true, false, |
791 | reset_host_protection); | 821 | host_writable); |
792 | } | 822 | } |
793 | 823 | ||
794 | return !nr_present; | 824 | return !nr_present; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 82e144a4e514..25bd1bc5aad2 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -31,6 +31,7 @@ | |||
31 | 31 | ||
32 | #include <asm/tlbflush.h> | 32 | #include <asm/tlbflush.h> |
33 | #include <asm/desc.h> | 33 | #include <asm/desc.h> |
34 | #include <asm/kvm_para.h> | ||
34 | 35 | ||
35 | #include <asm/virtext.h> | 36 | #include <asm/virtext.h> |
36 | #include "trace.h" | 37 | #include "trace.h" |
@@ -50,6 +51,10 @@ MODULE_LICENSE("GPL"); | |||
50 | #define SVM_FEATURE_LBRV (1 << 1) | 51 | #define SVM_FEATURE_LBRV (1 << 1) |
51 | #define SVM_FEATURE_SVML (1 << 2) | 52 | #define SVM_FEATURE_SVML (1 << 2) |
52 | #define SVM_FEATURE_NRIP (1 << 3) | 53 | #define SVM_FEATURE_NRIP (1 << 3) |
54 | #define SVM_FEATURE_TSC_RATE (1 << 4) | ||
55 | #define SVM_FEATURE_VMCB_CLEAN (1 << 5) | ||
56 | #define SVM_FEATURE_FLUSH_ASID (1 << 6) | ||
57 | #define SVM_FEATURE_DECODE_ASSIST (1 << 7) | ||
53 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 58 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) |
54 | 59 | ||
55 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 60 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
@@ -97,10 +102,8 @@ struct nested_state { | |||
97 | unsigned long vmexit_rax; | 102 | unsigned long vmexit_rax; |
98 | 103 | ||
99 | /* cache for intercepts of the guest */ | 104 | /* cache for intercepts of the guest */ |
100 | u16 intercept_cr_read; | 105 | u32 intercept_cr; |
101 | u16 intercept_cr_write; | 106 | u32 intercept_dr; |
102 | u16 intercept_dr_read; | ||
103 | u16 intercept_dr_write; | ||
104 | u32 intercept_exceptions; | 107 | u32 intercept_exceptions; |
105 | u64 intercept; | 108 | u64 intercept; |
106 | 109 | ||
@@ -123,7 +126,12 @@ struct vcpu_svm { | |||
123 | u64 next_rip; | 126 | u64 next_rip; |
124 | 127 | ||
125 | u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; | 128 | u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; |
126 | u64 host_gs_base; | 129 | struct { |
130 | u16 fs; | ||
131 | u16 gs; | ||
132 | u16 ldt; | ||
133 | u64 gs_base; | ||
134 | } host; | ||
127 | 135 | ||
128 | u32 *msrpm; | 136 | u32 *msrpm; |
129 | 137 | ||
@@ -133,6 +141,7 @@ struct vcpu_svm { | |||
133 | 141 | ||
134 | unsigned int3_injected; | 142 | unsigned int3_injected; |
135 | unsigned long int3_rip; | 143 | unsigned long int3_rip; |
144 | u32 apf_reason; | ||
136 | }; | 145 | }; |
137 | 146 | ||
138 | #define MSR_INVALID 0xffffffffU | 147 | #define MSR_INVALID 0xffffffffU |
@@ -180,14 +189,151 @@ static int nested_svm_vmexit(struct vcpu_svm *svm); | |||
180 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 189 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
181 | bool has_error_code, u32 error_code); | 190 | bool has_error_code, u32 error_code); |
182 | 191 | ||
192 | enum { | ||
193 | VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, | ||
194 | pause filter count */ | ||
195 | VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */ | ||
196 | VMCB_ASID, /* ASID */ | ||
197 | VMCB_INTR, /* int_ctl, int_vector */ | ||
198 | VMCB_NPT, /* npt_en, nCR3, gPAT */ | ||
199 | VMCB_CR, /* CR0, CR3, CR4, EFER */ | ||
200 | VMCB_DR, /* DR6, DR7 */ | ||
201 | VMCB_DT, /* GDT, IDT */ | ||
202 | VMCB_SEG, /* CS, DS, SS, ES, CPL */ | ||
203 | VMCB_CR2, /* CR2 only */ | ||
204 | VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ | ||
205 | VMCB_DIRTY_MAX, | ||
206 | }; | ||
207 | |||
208 | /* TPR and CR2 are always written before VMRUN */ | ||
209 | #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) | ||
210 | |||
211 | static inline void mark_all_dirty(struct vmcb *vmcb) | ||
212 | { | ||
213 | vmcb->control.clean = 0; | ||
214 | } | ||
215 | |||
216 | static inline void mark_all_clean(struct vmcb *vmcb) | ||
217 | { | ||
218 | vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1) | ||
219 | & ~VMCB_ALWAYS_DIRTY_MASK; | ||
220 | } | ||
221 | |||
222 | static inline void mark_dirty(struct vmcb *vmcb, int bit) | ||
223 | { | ||
224 | vmcb->control.clean &= ~(1 << bit); | ||
225 | } | ||
226 | |||
183 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) | 227 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) |
184 | { | 228 | { |
185 | return container_of(vcpu, struct vcpu_svm, vcpu); | 229 | return container_of(vcpu, struct vcpu_svm, vcpu); |
186 | } | 230 | } |
187 | 231 | ||
188 | static inline bool is_nested(struct vcpu_svm *svm) | 232 | static void recalc_intercepts(struct vcpu_svm *svm) |
233 | { | ||
234 | struct vmcb_control_area *c, *h; | ||
235 | struct nested_state *g; | ||
236 | |||
237 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | ||
238 | |||
239 | if (!is_guest_mode(&svm->vcpu)) | ||
240 | return; | ||
241 | |||
242 | c = &svm->vmcb->control; | ||
243 | h = &svm->nested.hsave->control; | ||
244 | g = &svm->nested; | ||
245 | |||
246 | c->intercept_cr = h->intercept_cr | g->intercept_cr; | ||
247 | c->intercept_dr = h->intercept_dr | g->intercept_dr; | ||
248 | c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; | ||
249 | c->intercept = h->intercept | g->intercept; | ||
250 | } | ||
251 | |||
252 | static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm) | ||
253 | { | ||
254 | if (is_guest_mode(&svm->vcpu)) | ||
255 | return svm->nested.hsave; | ||
256 | else | ||
257 | return svm->vmcb; | ||
258 | } | ||
259 | |||
260 | static inline void set_cr_intercept(struct vcpu_svm *svm, int bit) | ||
261 | { | ||
262 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
263 | |||
264 | vmcb->control.intercept_cr |= (1U << bit); | ||
265 | |||
266 | recalc_intercepts(svm); | ||
267 | } | ||
268 | |||
269 | static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit) | ||
270 | { | ||
271 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
272 | |||
273 | vmcb->control.intercept_cr &= ~(1U << bit); | ||
274 | |||
275 | recalc_intercepts(svm); | ||
276 | } | ||
277 | |||
278 | static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) | ||
279 | { | ||
280 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
281 | |||
282 | return vmcb->control.intercept_cr & (1U << bit); | ||
283 | } | ||
284 | |||
285 | static inline void set_dr_intercept(struct vcpu_svm *svm, int bit) | ||
286 | { | ||
287 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
288 | |||
289 | vmcb->control.intercept_dr |= (1U << bit); | ||
290 | |||
291 | recalc_intercepts(svm); | ||
292 | } | ||
293 | |||
294 | static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit) | ||
295 | { | ||
296 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
297 | |||
298 | vmcb->control.intercept_dr &= ~(1U << bit); | ||
299 | |||
300 | recalc_intercepts(svm); | ||
301 | } | ||
302 | |||
303 | static inline void set_exception_intercept(struct vcpu_svm *svm, int bit) | ||
304 | { | ||
305 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
306 | |||
307 | vmcb->control.intercept_exceptions |= (1U << bit); | ||
308 | |||
309 | recalc_intercepts(svm); | ||
310 | } | ||
311 | |||
312 | static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit) | ||
189 | { | 313 | { |
190 | return svm->nested.vmcb; | 314 | struct vmcb *vmcb = get_host_vmcb(svm); |
315 | |||
316 | vmcb->control.intercept_exceptions &= ~(1U << bit); | ||
317 | |||
318 | recalc_intercepts(svm); | ||
319 | } | ||
320 | |||
321 | static inline void set_intercept(struct vcpu_svm *svm, int bit) | ||
322 | { | ||
323 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
324 | |||
325 | vmcb->control.intercept |= (1ULL << bit); | ||
326 | |||
327 | recalc_intercepts(svm); | ||
328 | } | ||
329 | |||
330 | static inline void clr_intercept(struct vcpu_svm *svm, int bit) | ||
331 | { | ||
332 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
333 | |||
334 | vmcb->control.intercept &= ~(1ULL << bit); | ||
335 | |||
336 | recalc_intercepts(svm); | ||
191 | } | 337 | } |
192 | 338 | ||
193 | static inline void enable_gif(struct vcpu_svm *svm) | 339 | static inline void enable_gif(struct vcpu_svm *svm) |
@@ -264,11 +410,6 @@ static u32 svm_msrpm_offset(u32 msr) | |||
264 | 410 | ||
265 | #define MAX_INST_SIZE 15 | 411 | #define MAX_INST_SIZE 15 |
266 | 412 | ||
267 | static inline u32 svm_has(u32 feat) | ||
268 | { | ||
269 | return svm_features & feat; | ||
270 | } | ||
271 | |||
272 | static inline void clgi(void) | 413 | static inline void clgi(void) |
273 | { | 414 | { |
274 | asm volatile (__ex(SVM_CLGI)); | 415 | asm volatile (__ex(SVM_CLGI)); |
@@ -284,16 +425,6 @@ static inline void invlpga(unsigned long addr, u32 asid) | |||
284 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); | 425 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
285 | } | 426 | } |
286 | 427 | ||
287 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | ||
288 | { | ||
289 | to_svm(vcpu)->asid_generation--; | ||
290 | } | ||
291 | |||
292 | static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) | ||
293 | { | ||
294 | force_new_asid(vcpu); | ||
295 | } | ||
296 | |||
297 | static int get_npt_level(void) | 428 | static int get_npt_level(void) |
298 | { | 429 | { |
299 | #ifdef CONFIG_X86_64 | 430 | #ifdef CONFIG_X86_64 |
@@ -310,6 +441,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
310 | efer &= ~EFER_LME; | 441 | efer &= ~EFER_LME; |
311 | 442 | ||
312 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; | 443 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; |
444 | mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); | ||
313 | } | 445 | } |
314 | 446 | ||
315 | static int is_external_interrupt(u32 info) | 447 | static int is_external_interrupt(u32 info) |
@@ -347,7 +479,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
347 | svm->next_rip = svm->vmcb->control.next_rip; | 479 | svm->next_rip = svm->vmcb->control.next_rip; |
348 | 480 | ||
349 | if (!svm->next_rip) { | 481 | if (!svm->next_rip) { |
350 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != | 482 | if (emulate_instruction(vcpu, EMULTYPE_SKIP) != |
351 | EMULATE_DONE) | 483 | EMULATE_DONE) |
352 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 484 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
353 | return; | 485 | return; |
@@ -374,7 +506,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
374 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) | 506 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) |
375 | return; | 507 | return; |
376 | 508 | ||
377 | if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { | 509 | if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { |
378 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | 510 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); |
379 | 511 | ||
380 | /* | 512 | /* |
@@ -670,7 +802,7 @@ static __init int svm_hardware_setup(void) | |||
670 | 802 | ||
671 | svm_features = cpuid_edx(SVM_CPUID_FUNC); | 803 | svm_features = cpuid_edx(SVM_CPUID_FUNC); |
672 | 804 | ||
673 | if (!svm_has(SVM_FEATURE_NPT)) | 805 | if (!boot_cpu_has(X86_FEATURE_NPT)) |
674 | npt_enabled = false; | 806 | npt_enabled = false; |
675 | 807 | ||
676 | if (npt_enabled && !npt) { | 808 | if (npt_enabled && !npt) { |
@@ -725,13 +857,15 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
725 | struct vcpu_svm *svm = to_svm(vcpu); | 857 | struct vcpu_svm *svm = to_svm(vcpu); |
726 | u64 g_tsc_offset = 0; | 858 | u64 g_tsc_offset = 0; |
727 | 859 | ||
728 | if (is_nested(svm)) { | 860 | if (is_guest_mode(vcpu)) { |
729 | g_tsc_offset = svm->vmcb->control.tsc_offset - | 861 | g_tsc_offset = svm->vmcb->control.tsc_offset - |
730 | svm->nested.hsave->control.tsc_offset; | 862 | svm->nested.hsave->control.tsc_offset; |
731 | svm->nested.hsave->control.tsc_offset = offset; | 863 | svm->nested.hsave->control.tsc_offset = offset; |
732 | } | 864 | } |
733 | 865 | ||
734 | svm->vmcb->control.tsc_offset = offset + g_tsc_offset; | 866 | svm->vmcb->control.tsc_offset = offset + g_tsc_offset; |
867 | |||
868 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | ||
735 | } | 869 | } |
736 | 870 | ||
737 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 871 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) |
@@ -739,8 +873,9 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | |||
739 | struct vcpu_svm *svm = to_svm(vcpu); | 873 | struct vcpu_svm *svm = to_svm(vcpu); |
740 | 874 | ||
741 | svm->vmcb->control.tsc_offset += adjustment; | 875 | svm->vmcb->control.tsc_offset += adjustment; |
742 | if (is_nested(svm)) | 876 | if (is_guest_mode(vcpu)) |
743 | svm->nested.hsave->control.tsc_offset += adjustment; | 877 | svm->nested.hsave->control.tsc_offset += adjustment; |
878 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | ||
744 | } | 879 | } |
745 | 880 | ||
746 | static void init_vmcb(struct vcpu_svm *svm) | 881 | static void init_vmcb(struct vcpu_svm *svm) |
@@ -749,62 +884,62 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
749 | struct vmcb_save_area *save = &svm->vmcb->save; | 884 | struct vmcb_save_area *save = &svm->vmcb->save; |
750 | 885 | ||
751 | svm->vcpu.fpu_active = 1; | 886 | svm->vcpu.fpu_active = 1; |
887 | svm->vcpu.arch.hflags = 0; | ||
752 | 888 | ||
753 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 889 | set_cr_intercept(svm, INTERCEPT_CR0_READ); |
754 | INTERCEPT_CR3_MASK | | 890 | set_cr_intercept(svm, INTERCEPT_CR3_READ); |
755 | INTERCEPT_CR4_MASK; | 891 | set_cr_intercept(svm, INTERCEPT_CR4_READ); |
756 | 892 | set_cr_intercept(svm, INTERCEPT_CR0_WRITE); | |
757 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 893 | set_cr_intercept(svm, INTERCEPT_CR3_WRITE); |
758 | INTERCEPT_CR3_MASK | | 894 | set_cr_intercept(svm, INTERCEPT_CR4_WRITE); |
759 | INTERCEPT_CR4_MASK | | 895 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
760 | INTERCEPT_CR8_MASK; | 896 | |
761 | 897 | set_dr_intercept(svm, INTERCEPT_DR0_READ); | |
762 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 898 | set_dr_intercept(svm, INTERCEPT_DR1_READ); |
763 | INTERCEPT_DR1_MASK | | 899 | set_dr_intercept(svm, INTERCEPT_DR2_READ); |
764 | INTERCEPT_DR2_MASK | | 900 | set_dr_intercept(svm, INTERCEPT_DR3_READ); |
765 | INTERCEPT_DR3_MASK | | 901 | set_dr_intercept(svm, INTERCEPT_DR4_READ); |
766 | INTERCEPT_DR4_MASK | | 902 | set_dr_intercept(svm, INTERCEPT_DR5_READ); |
767 | INTERCEPT_DR5_MASK | | 903 | set_dr_intercept(svm, INTERCEPT_DR6_READ); |
768 | INTERCEPT_DR6_MASK | | 904 | set_dr_intercept(svm, INTERCEPT_DR7_READ); |
769 | INTERCEPT_DR7_MASK; | 905 | |
770 | 906 | set_dr_intercept(svm, INTERCEPT_DR0_WRITE); | |
771 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 907 | set_dr_intercept(svm, INTERCEPT_DR1_WRITE); |
772 | INTERCEPT_DR1_MASK | | 908 | set_dr_intercept(svm, INTERCEPT_DR2_WRITE); |
773 | INTERCEPT_DR2_MASK | | 909 | set_dr_intercept(svm, INTERCEPT_DR3_WRITE); |
774 | INTERCEPT_DR3_MASK | | 910 | set_dr_intercept(svm, INTERCEPT_DR4_WRITE); |
775 | INTERCEPT_DR4_MASK | | 911 | set_dr_intercept(svm, INTERCEPT_DR5_WRITE); |
776 | INTERCEPT_DR5_MASK | | 912 | set_dr_intercept(svm, INTERCEPT_DR6_WRITE); |
777 | INTERCEPT_DR6_MASK | | 913 | set_dr_intercept(svm, INTERCEPT_DR7_WRITE); |
778 | INTERCEPT_DR7_MASK; | 914 | |
779 | 915 | set_exception_intercept(svm, PF_VECTOR); | |
780 | control->intercept_exceptions = (1 << PF_VECTOR) | | 916 | set_exception_intercept(svm, UD_VECTOR); |
781 | (1 << UD_VECTOR) | | 917 | set_exception_intercept(svm, MC_VECTOR); |
782 | (1 << MC_VECTOR); | 918 | |
783 | 919 | set_intercept(svm, INTERCEPT_INTR); | |
784 | 920 | set_intercept(svm, INTERCEPT_NMI); | |
785 | control->intercept = (1ULL << INTERCEPT_INTR) | | 921 | set_intercept(svm, INTERCEPT_SMI); |
786 | (1ULL << INTERCEPT_NMI) | | 922 | set_intercept(svm, INTERCEPT_SELECTIVE_CR0); |
787 | (1ULL << INTERCEPT_SMI) | | 923 | set_intercept(svm, INTERCEPT_CPUID); |
788 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | 924 | set_intercept(svm, INTERCEPT_INVD); |
789 | (1ULL << INTERCEPT_CPUID) | | 925 | set_intercept(svm, INTERCEPT_HLT); |
790 | (1ULL << INTERCEPT_INVD) | | 926 | set_intercept(svm, INTERCEPT_INVLPG); |
791 | (1ULL << INTERCEPT_HLT) | | 927 | set_intercept(svm, INTERCEPT_INVLPGA); |
792 | (1ULL << INTERCEPT_INVLPG) | | 928 | set_intercept(svm, INTERCEPT_IOIO_PROT); |
793 | (1ULL << INTERCEPT_INVLPGA) | | 929 | set_intercept(svm, INTERCEPT_MSR_PROT); |
794 | (1ULL << INTERCEPT_IOIO_PROT) | | 930 | set_intercept(svm, INTERCEPT_TASK_SWITCH); |
795 | (1ULL << INTERCEPT_MSR_PROT) | | 931 | set_intercept(svm, INTERCEPT_SHUTDOWN); |
796 | (1ULL << INTERCEPT_TASK_SWITCH) | | 932 | set_intercept(svm, INTERCEPT_VMRUN); |
797 | (1ULL << INTERCEPT_SHUTDOWN) | | 933 | set_intercept(svm, INTERCEPT_VMMCALL); |
798 | (1ULL << INTERCEPT_VMRUN) | | 934 | set_intercept(svm, INTERCEPT_VMLOAD); |
799 | (1ULL << INTERCEPT_VMMCALL) | | 935 | set_intercept(svm, INTERCEPT_VMSAVE); |
800 | (1ULL << INTERCEPT_VMLOAD) | | 936 | set_intercept(svm, INTERCEPT_STGI); |
801 | (1ULL << INTERCEPT_VMSAVE) | | 937 | set_intercept(svm, INTERCEPT_CLGI); |
802 | (1ULL << INTERCEPT_STGI) | | 938 | set_intercept(svm, INTERCEPT_SKINIT); |
803 | (1ULL << INTERCEPT_CLGI) | | 939 | set_intercept(svm, INTERCEPT_WBINVD); |
804 | (1ULL << INTERCEPT_SKINIT) | | 940 | set_intercept(svm, INTERCEPT_MONITOR); |
805 | (1ULL << INTERCEPT_WBINVD) | | 941 | set_intercept(svm, INTERCEPT_MWAIT); |
806 | (1ULL << INTERCEPT_MONITOR) | | 942 | set_intercept(svm, INTERCEPT_XSETBV); |
807 | (1ULL << INTERCEPT_MWAIT); | ||
808 | 943 | ||
809 | control->iopm_base_pa = iopm_base; | 944 | control->iopm_base_pa = iopm_base; |
810 | control->msrpm_base_pa = __pa(svm->msrpm); | 945 | control->msrpm_base_pa = __pa(svm->msrpm); |
@@ -855,25 +990,27 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
855 | if (npt_enabled) { | 990 | if (npt_enabled) { |
856 | /* Setup VMCB for Nested Paging */ | 991 | /* Setup VMCB for Nested Paging */ |
857 | control->nested_ctl = 1; | 992 | control->nested_ctl = 1; |
858 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | | 993 | clr_intercept(svm, INTERCEPT_TASK_SWITCH); |
859 | (1ULL << INTERCEPT_INVLPG)); | 994 | clr_intercept(svm, INTERCEPT_INVLPG); |
860 | control->intercept_exceptions &= ~(1 << PF_VECTOR); | 995 | clr_exception_intercept(svm, PF_VECTOR); |
861 | control->intercept_cr_read &= ~INTERCEPT_CR3_MASK; | 996 | clr_cr_intercept(svm, INTERCEPT_CR3_READ); |
862 | control->intercept_cr_write &= ~INTERCEPT_CR3_MASK; | 997 | clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); |
863 | save->g_pat = 0x0007040600070406ULL; | 998 | save->g_pat = 0x0007040600070406ULL; |
864 | save->cr3 = 0; | 999 | save->cr3 = 0; |
865 | save->cr4 = 0; | 1000 | save->cr4 = 0; |
866 | } | 1001 | } |
867 | force_new_asid(&svm->vcpu); | 1002 | svm->asid_generation = 0; |
868 | 1003 | ||
869 | svm->nested.vmcb = 0; | 1004 | svm->nested.vmcb = 0; |
870 | svm->vcpu.arch.hflags = 0; | 1005 | svm->vcpu.arch.hflags = 0; |
871 | 1006 | ||
872 | if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { | 1007 | if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { |
873 | control->pause_filter_count = 3000; | 1008 | control->pause_filter_count = 3000; |
874 | control->intercept |= (1ULL << INTERCEPT_PAUSE); | 1009 | set_intercept(svm, INTERCEPT_PAUSE); |
875 | } | 1010 | } |
876 | 1011 | ||
1012 | mark_all_dirty(svm->vmcb); | ||
1013 | |||
877 | enable_gif(svm); | 1014 | enable_gif(svm); |
878 | } | 1015 | } |
879 | 1016 | ||
@@ -990,8 +1127,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
990 | 1127 | ||
991 | if (unlikely(cpu != vcpu->cpu)) { | 1128 | if (unlikely(cpu != vcpu->cpu)) { |
992 | svm->asid_generation = 0; | 1129 | svm->asid_generation = 0; |
1130 | mark_all_dirty(svm->vmcb); | ||
993 | } | 1131 | } |
994 | 1132 | ||
1133 | #ifdef CONFIG_X86_64 | ||
1134 | rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); | ||
1135 | #endif | ||
1136 | savesegment(fs, svm->host.fs); | ||
1137 | savesegment(gs, svm->host.gs); | ||
1138 | svm->host.ldt = kvm_read_ldt(); | ||
1139 | |||
995 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1140 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
996 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1141 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
997 | } | 1142 | } |
@@ -1002,6 +1147,14 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1002 | int i; | 1147 | int i; |
1003 | 1148 | ||
1004 | ++vcpu->stat.host_state_reload; | 1149 | ++vcpu->stat.host_state_reload; |
1150 | kvm_load_ldt(svm->host.ldt); | ||
1151 | #ifdef CONFIG_X86_64 | ||
1152 | loadsegment(fs, svm->host.fs); | ||
1153 | load_gs_index(svm->host.gs); | ||
1154 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
1155 | #else | ||
1156 | loadsegment(gs, svm->host.gs); | ||
1157 | #endif | ||
1005 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1158 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
1006 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1159 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1007 | } | 1160 | } |
@@ -1021,7 +1174,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1021 | switch (reg) { | 1174 | switch (reg) { |
1022 | case VCPU_EXREG_PDPTR: | 1175 | case VCPU_EXREG_PDPTR: |
1023 | BUG_ON(!npt_enabled); | 1176 | BUG_ON(!npt_enabled); |
1024 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); | 1177 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
1025 | break; | 1178 | break; |
1026 | default: | 1179 | default: |
1027 | BUG(); | 1180 | BUG(); |
@@ -1030,12 +1183,12 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1030 | 1183 | ||
1031 | static void svm_set_vintr(struct vcpu_svm *svm) | 1184 | static void svm_set_vintr(struct vcpu_svm *svm) |
1032 | { | 1185 | { |
1033 | svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR; | 1186 | set_intercept(svm, INTERCEPT_VINTR); |
1034 | } | 1187 | } |
1035 | 1188 | ||
1036 | static void svm_clear_vintr(struct vcpu_svm *svm) | 1189 | static void svm_clear_vintr(struct vcpu_svm *svm) |
1037 | { | 1190 | { |
1038 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); | 1191 | clr_intercept(svm, INTERCEPT_VINTR); |
1039 | } | 1192 | } |
1040 | 1193 | ||
1041 | static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) | 1194 | static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) |
@@ -1150,6 +1303,7 @@ static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) | |||
1150 | 1303 | ||
1151 | svm->vmcb->save.idtr.limit = dt->size; | 1304 | svm->vmcb->save.idtr.limit = dt->size; |
1152 | svm->vmcb->save.idtr.base = dt->address ; | 1305 | svm->vmcb->save.idtr.base = dt->address ; |
1306 | mark_dirty(svm->vmcb, VMCB_DT); | ||
1153 | } | 1307 | } |
1154 | 1308 | ||
1155 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) | 1309 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
@@ -1166,19 +1320,23 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) | |||
1166 | 1320 | ||
1167 | svm->vmcb->save.gdtr.limit = dt->size; | 1321 | svm->vmcb->save.gdtr.limit = dt->size; |
1168 | svm->vmcb->save.gdtr.base = dt->address ; | 1322 | svm->vmcb->save.gdtr.base = dt->address ; |
1323 | mark_dirty(svm->vmcb, VMCB_DT); | ||
1169 | } | 1324 | } |
1170 | 1325 | ||
1171 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1326 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
1172 | { | 1327 | { |
1173 | } | 1328 | } |
1174 | 1329 | ||
1330 | static void svm_decache_cr3(struct kvm_vcpu *vcpu) | ||
1331 | { | ||
1332 | } | ||
1333 | |||
1175 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1334 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1176 | { | 1335 | { |
1177 | } | 1336 | } |
1178 | 1337 | ||
1179 | static void update_cr0_intercept(struct vcpu_svm *svm) | 1338 | static void update_cr0_intercept(struct vcpu_svm *svm) |
1180 | { | 1339 | { |
1181 | struct vmcb *vmcb = svm->vmcb; | ||
1182 | ulong gcr0 = svm->vcpu.arch.cr0; | 1340 | ulong gcr0 = svm->vcpu.arch.cr0; |
1183 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1341 | u64 *hcr0 = &svm->vmcb->save.cr0; |
1184 | 1342 | ||
@@ -1188,27 +1346,14 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
1188 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | 1346 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) |
1189 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); | 1347 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); |
1190 | 1348 | ||
1349 | mark_dirty(svm->vmcb, VMCB_CR); | ||
1191 | 1350 | ||
1192 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1351 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { |
1193 | vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | 1352 | clr_cr_intercept(svm, INTERCEPT_CR0_READ); |
1194 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | 1353 | clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); |
1195 | if (is_nested(svm)) { | ||
1196 | struct vmcb *hsave = svm->nested.hsave; | ||
1197 | |||
1198 | hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
1199 | hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
1200 | vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; | ||
1201 | vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; | ||
1202 | } | ||
1203 | } else { | 1354 | } else { |
1204 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | 1355 | set_cr_intercept(svm, INTERCEPT_CR0_READ); |
1205 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | 1356 | set_cr_intercept(svm, INTERCEPT_CR0_WRITE); |
1206 | if (is_nested(svm)) { | ||
1207 | struct vmcb *hsave = svm->nested.hsave; | ||
1208 | |||
1209 | hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
1210 | hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
1211 | } | ||
1212 | } | 1357 | } |
1213 | } | 1358 | } |
1214 | 1359 | ||
@@ -1216,7 +1361,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1216 | { | 1361 | { |
1217 | struct vcpu_svm *svm = to_svm(vcpu); | 1362 | struct vcpu_svm *svm = to_svm(vcpu); |
1218 | 1363 | ||
1219 | if (is_nested(svm)) { | 1364 | if (is_guest_mode(vcpu)) { |
1220 | /* | 1365 | /* |
1221 | * We are here because we run in nested mode, the host kvm | 1366 | * We are here because we run in nested mode, the host kvm |
1222 | * intercepts cr0 writes but the l1 hypervisor does not. | 1367 | * intercepts cr0 writes but the l1 hypervisor does not. |
@@ -1268,6 +1413,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1268 | */ | 1413 | */ |
1269 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | 1414 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); |
1270 | svm->vmcb->save.cr0 = cr0; | 1415 | svm->vmcb->save.cr0 = cr0; |
1416 | mark_dirty(svm->vmcb, VMCB_CR); | ||
1271 | update_cr0_intercept(svm); | 1417 | update_cr0_intercept(svm); |
1272 | } | 1418 | } |
1273 | 1419 | ||
@@ -1277,13 +1423,14 @@ static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1277 | unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; | 1423 | unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; |
1278 | 1424 | ||
1279 | if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) | 1425 | if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) |
1280 | force_new_asid(vcpu); | 1426 | svm_flush_tlb(vcpu); |
1281 | 1427 | ||
1282 | vcpu->arch.cr4 = cr4; | 1428 | vcpu->arch.cr4 = cr4; |
1283 | if (!npt_enabled) | 1429 | if (!npt_enabled) |
1284 | cr4 |= X86_CR4_PAE; | 1430 | cr4 |= X86_CR4_PAE; |
1285 | cr4 |= host_cr4_mce; | 1431 | cr4 |= host_cr4_mce; |
1286 | to_svm(vcpu)->vmcb->save.cr4 = cr4; | 1432 | to_svm(vcpu)->vmcb->save.cr4 = cr4; |
1433 | mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); | ||
1287 | } | 1434 | } |
1288 | 1435 | ||
1289 | static void svm_set_segment(struct kvm_vcpu *vcpu, | 1436 | static void svm_set_segment(struct kvm_vcpu *vcpu, |
@@ -1312,26 +1459,25 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
1312 | = (svm->vmcb->save.cs.attrib | 1459 | = (svm->vmcb->save.cs.attrib |
1313 | >> SVM_SELECTOR_DPL_SHIFT) & 3; | 1460 | >> SVM_SELECTOR_DPL_SHIFT) & 3; |
1314 | 1461 | ||
1462 | mark_dirty(svm->vmcb, VMCB_SEG); | ||
1315 | } | 1463 | } |
1316 | 1464 | ||
1317 | static void update_db_intercept(struct kvm_vcpu *vcpu) | 1465 | static void update_db_intercept(struct kvm_vcpu *vcpu) |
1318 | { | 1466 | { |
1319 | struct vcpu_svm *svm = to_svm(vcpu); | 1467 | struct vcpu_svm *svm = to_svm(vcpu); |
1320 | 1468 | ||
1321 | svm->vmcb->control.intercept_exceptions &= | 1469 | clr_exception_intercept(svm, DB_VECTOR); |
1322 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 1470 | clr_exception_intercept(svm, BP_VECTOR); |
1323 | 1471 | ||
1324 | if (svm->nmi_singlestep) | 1472 | if (svm->nmi_singlestep) |
1325 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | 1473 | set_exception_intercept(svm, DB_VECTOR); |
1326 | 1474 | ||
1327 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 1475 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
1328 | if (vcpu->guest_debug & | 1476 | if (vcpu->guest_debug & |
1329 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | 1477 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) |
1330 | svm->vmcb->control.intercept_exceptions |= | 1478 | set_exception_intercept(svm, DB_VECTOR); |
1331 | 1 << DB_VECTOR; | ||
1332 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 1479 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
1333 | svm->vmcb->control.intercept_exceptions |= | 1480 | set_exception_intercept(svm, BP_VECTOR); |
1334 | 1 << BP_VECTOR; | ||
1335 | } else | 1481 | } else |
1336 | vcpu->guest_debug = 0; | 1482 | vcpu->guest_debug = 0; |
1337 | } | 1483 | } |
@@ -1345,21 +1491,9 @@ static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | |||
1345 | else | 1491 | else |
1346 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1492 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1347 | 1493 | ||
1348 | update_db_intercept(vcpu); | 1494 | mark_dirty(svm->vmcb, VMCB_DR); |
1349 | } | ||
1350 | |||
1351 | static void load_host_msrs(struct kvm_vcpu *vcpu) | ||
1352 | { | ||
1353 | #ifdef CONFIG_X86_64 | ||
1354 | wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); | ||
1355 | #endif | ||
1356 | } | ||
1357 | 1495 | ||
1358 | static void save_host_msrs(struct kvm_vcpu *vcpu) | 1496 | update_db_intercept(vcpu); |
1359 | { | ||
1360 | #ifdef CONFIG_X86_64 | ||
1361 | rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); | ||
1362 | #endif | ||
1363 | } | 1497 | } |
1364 | 1498 | ||
1365 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | 1499 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) |
@@ -1372,6 +1506,8 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1372 | 1506 | ||
1373 | svm->asid_generation = sd->asid_generation; | 1507 | svm->asid_generation = sd->asid_generation; |
1374 | svm->vmcb->control.asid = sd->next_asid++; | 1508 | svm->vmcb->control.asid = sd->next_asid++; |
1509 | |||
1510 | mark_dirty(svm->vmcb, VMCB_ASID); | ||
1375 | } | 1511 | } |
1376 | 1512 | ||
1377 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) | 1513 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
@@ -1379,20 +1515,40 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) | |||
1379 | struct vcpu_svm *svm = to_svm(vcpu); | 1515 | struct vcpu_svm *svm = to_svm(vcpu); |
1380 | 1516 | ||
1381 | svm->vmcb->save.dr7 = value; | 1517 | svm->vmcb->save.dr7 = value; |
1518 | mark_dirty(svm->vmcb, VMCB_DR); | ||
1382 | } | 1519 | } |
1383 | 1520 | ||
1384 | static int pf_interception(struct vcpu_svm *svm) | 1521 | static int pf_interception(struct vcpu_svm *svm) |
1385 | { | 1522 | { |
1386 | u64 fault_address; | 1523 | u64 fault_address = svm->vmcb->control.exit_info_2; |
1387 | u32 error_code; | 1524 | u32 error_code; |
1525 | int r = 1; | ||
1388 | 1526 | ||
1389 | fault_address = svm->vmcb->control.exit_info_2; | 1527 | switch (svm->apf_reason) { |
1390 | error_code = svm->vmcb->control.exit_info_1; | 1528 | default: |
1391 | 1529 | error_code = svm->vmcb->control.exit_info_1; | |
1392 | trace_kvm_page_fault(fault_address, error_code); | 1530 | |
1393 | if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) | 1531 | trace_kvm_page_fault(fault_address, error_code); |
1394 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | 1532 | if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) |
1395 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1533 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); |
1534 | r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, | ||
1535 | svm->vmcb->control.insn_bytes, | ||
1536 | svm->vmcb->control.insn_len); | ||
1537 | break; | ||
1538 | case KVM_PV_REASON_PAGE_NOT_PRESENT: | ||
1539 | svm->apf_reason = 0; | ||
1540 | local_irq_disable(); | ||
1541 | kvm_async_pf_task_wait(fault_address); | ||
1542 | local_irq_enable(); | ||
1543 | break; | ||
1544 | case KVM_PV_REASON_PAGE_READY: | ||
1545 | svm->apf_reason = 0; | ||
1546 | local_irq_disable(); | ||
1547 | kvm_async_pf_task_wake(fault_address); | ||
1548 | local_irq_enable(); | ||
1549 | break; | ||
1550 | } | ||
1551 | return r; | ||
1396 | } | 1552 | } |
1397 | 1553 | ||
1398 | static int db_interception(struct vcpu_svm *svm) | 1554 | static int db_interception(struct vcpu_svm *svm) |
@@ -1440,7 +1596,7 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1440 | { | 1596 | { |
1441 | int er; | 1597 | int er; |
1442 | 1598 | ||
1443 | er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); | 1599 | er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); |
1444 | if (er != EMULATE_DONE) | 1600 | if (er != EMULATE_DONE) |
1445 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1601 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1446 | return 1; | 1602 | return 1; |
@@ -1449,21 +1605,8 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1449 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1605 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1450 | { | 1606 | { |
1451 | struct vcpu_svm *svm = to_svm(vcpu); | 1607 | struct vcpu_svm *svm = to_svm(vcpu); |
1452 | u32 excp; | ||
1453 | |||
1454 | if (is_nested(svm)) { | ||
1455 | u32 h_excp, n_excp; | ||
1456 | |||
1457 | h_excp = svm->nested.hsave->control.intercept_exceptions; | ||
1458 | n_excp = svm->nested.intercept_exceptions; | ||
1459 | h_excp &= ~(1 << NM_VECTOR); | ||
1460 | excp = h_excp | n_excp; | ||
1461 | } else { | ||
1462 | excp = svm->vmcb->control.intercept_exceptions; | ||
1463 | excp &= ~(1 << NM_VECTOR); | ||
1464 | } | ||
1465 | 1608 | ||
1466 | svm->vmcb->control.intercept_exceptions = excp; | 1609 | clr_exception_intercept(svm, NM_VECTOR); |
1467 | 1610 | ||
1468 | svm->vcpu.fpu_active = 1; | 1611 | svm->vcpu.fpu_active = 1; |
1469 | update_cr0_intercept(svm); | 1612 | update_cr0_intercept(svm); |
@@ -1570,7 +1713,7 @@ static int io_interception(struct vcpu_svm *svm) | |||
1570 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1713 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1571 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1714 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1572 | if (string || in) | 1715 | if (string || in) |
1573 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 1716 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
1574 | 1717 | ||
1575 | port = io_info >> 16; | 1718 | port = io_info >> 16; |
1576 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1719 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
@@ -1624,17 +1767,19 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, | |||
1624 | struct vcpu_svm *svm = to_svm(vcpu); | 1767 | struct vcpu_svm *svm = to_svm(vcpu); |
1625 | 1768 | ||
1626 | svm->vmcb->control.nested_cr3 = root; | 1769 | svm->vmcb->control.nested_cr3 = root; |
1627 | force_new_asid(vcpu); | 1770 | mark_dirty(svm->vmcb, VMCB_NPT); |
1771 | svm_flush_tlb(vcpu); | ||
1628 | } | 1772 | } |
1629 | 1773 | ||
1630 | static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu) | 1774 | static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, |
1775 | struct x86_exception *fault) | ||
1631 | { | 1776 | { |
1632 | struct vcpu_svm *svm = to_svm(vcpu); | 1777 | struct vcpu_svm *svm = to_svm(vcpu); |
1633 | 1778 | ||
1634 | svm->vmcb->control.exit_code = SVM_EXIT_NPF; | 1779 | svm->vmcb->control.exit_code = SVM_EXIT_NPF; |
1635 | svm->vmcb->control.exit_code_hi = 0; | 1780 | svm->vmcb->control.exit_code_hi = 0; |
1636 | svm->vmcb->control.exit_info_1 = vcpu->arch.fault.error_code; | 1781 | svm->vmcb->control.exit_info_1 = fault->error_code; |
1637 | svm->vmcb->control.exit_info_2 = vcpu->arch.fault.address; | 1782 | svm->vmcb->control.exit_info_2 = fault->address; |
1638 | 1783 | ||
1639 | nested_svm_vmexit(svm); | 1784 | nested_svm_vmexit(svm); |
1640 | } | 1785 | } |
@@ -1680,7 +1825,7 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
1680 | { | 1825 | { |
1681 | int vmexit; | 1826 | int vmexit; |
1682 | 1827 | ||
1683 | if (!is_nested(svm)) | 1828 | if (!is_guest_mode(&svm->vcpu)) |
1684 | return 0; | 1829 | return 0; |
1685 | 1830 | ||
1686 | svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; | 1831 | svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; |
@@ -1698,7 +1843,7 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
1698 | /* This function returns true if it is save to enable the irq window */ | 1843 | /* This function returns true if it is save to enable the irq window */ |
1699 | static inline bool nested_svm_intr(struct vcpu_svm *svm) | 1844 | static inline bool nested_svm_intr(struct vcpu_svm *svm) |
1700 | { | 1845 | { |
1701 | if (!is_nested(svm)) | 1846 | if (!is_guest_mode(&svm->vcpu)) |
1702 | return true; | 1847 | return true; |
1703 | 1848 | ||
1704 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | 1849 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) |
@@ -1737,7 +1882,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) | |||
1737 | /* This function returns true if it is save to enable the nmi window */ | 1882 | /* This function returns true if it is save to enable the nmi window */ |
1738 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) | 1883 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) |
1739 | { | 1884 | { |
1740 | if (!is_nested(svm)) | 1885 | if (!is_guest_mode(&svm->vcpu)) |
1741 | return true; | 1886 | return true; |
1742 | 1887 | ||
1743 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) | 1888 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) |
@@ -1836,8 +1981,8 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1836 | return NESTED_EXIT_HOST; | 1981 | return NESTED_EXIT_HOST; |
1837 | break; | 1982 | break; |
1838 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | 1983 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: |
1839 | /* When we're shadowing, trap PFs */ | 1984 | /* When we're shadowing, trap PFs, but not async PF */ |
1840 | if (!npt_enabled) | 1985 | if (!npt_enabled && svm->apf_reason == 0) |
1841 | return NESTED_EXIT_HOST; | 1986 | return NESTED_EXIT_HOST; |
1842 | break; | 1987 | break; |
1843 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | 1988 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: |
@@ -1865,27 +2010,15 @@ static int nested_svm_intercept(struct vcpu_svm *svm) | |||
1865 | case SVM_EXIT_IOIO: | 2010 | case SVM_EXIT_IOIO: |
1866 | vmexit = nested_svm_intercept_ioio(svm); | 2011 | vmexit = nested_svm_intercept_ioio(svm); |
1867 | break; | 2012 | break; |
1868 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | 2013 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { |
1869 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | 2014 | u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); |
1870 | if (svm->nested.intercept_cr_read & cr_bits) | 2015 | if (svm->nested.intercept_cr & bit) |
1871 | vmexit = NESTED_EXIT_DONE; | 2016 | vmexit = NESTED_EXIT_DONE; |
1872 | break; | 2017 | break; |
1873 | } | 2018 | } |
1874 | case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: { | 2019 | case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { |
1875 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0); | 2020 | u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); |
1876 | if (svm->nested.intercept_cr_write & cr_bits) | 2021 | if (svm->nested.intercept_dr & bit) |
1877 | vmexit = NESTED_EXIT_DONE; | ||
1878 | break; | ||
1879 | } | ||
1880 | case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: { | ||
1881 | u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0); | ||
1882 | if (svm->nested.intercept_dr_read & dr_bits) | ||
1883 | vmexit = NESTED_EXIT_DONE; | ||
1884 | break; | ||
1885 | } | ||
1886 | case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: { | ||
1887 | u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0); | ||
1888 | if (svm->nested.intercept_dr_write & dr_bits) | ||
1889 | vmexit = NESTED_EXIT_DONE; | 2022 | vmexit = NESTED_EXIT_DONE; |
1890 | break; | 2023 | break; |
1891 | } | 2024 | } |
@@ -1893,6 +2026,10 @@ static int nested_svm_intercept(struct vcpu_svm *svm) | |||
1893 | u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); | 2026 | u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); |
1894 | if (svm->nested.intercept_exceptions & excp_bits) | 2027 | if (svm->nested.intercept_exceptions & excp_bits) |
1895 | vmexit = NESTED_EXIT_DONE; | 2028 | vmexit = NESTED_EXIT_DONE; |
2029 | /* async page fault always cause vmexit */ | ||
2030 | else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && | ||
2031 | svm->apf_reason != 0) | ||
2032 | vmexit = NESTED_EXIT_DONE; | ||
1896 | break; | 2033 | break; |
1897 | } | 2034 | } |
1898 | case SVM_EXIT_ERR: { | 2035 | case SVM_EXIT_ERR: { |
@@ -1926,10 +2063,8 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr | |||
1926 | struct vmcb_control_area *dst = &dst_vmcb->control; | 2063 | struct vmcb_control_area *dst = &dst_vmcb->control; |
1927 | struct vmcb_control_area *from = &from_vmcb->control; | 2064 | struct vmcb_control_area *from = &from_vmcb->control; |
1928 | 2065 | ||
1929 | dst->intercept_cr_read = from->intercept_cr_read; | 2066 | dst->intercept_cr = from->intercept_cr; |
1930 | dst->intercept_cr_write = from->intercept_cr_write; | 2067 | dst->intercept_dr = from->intercept_dr; |
1931 | dst->intercept_dr_read = from->intercept_dr_read; | ||
1932 | dst->intercept_dr_write = from->intercept_dr_write; | ||
1933 | dst->intercept_exceptions = from->intercept_exceptions; | 2068 | dst->intercept_exceptions = from->intercept_exceptions; |
1934 | dst->intercept = from->intercept; | 2069 | dst->intercept = from->intercept; |
1935 | dst->iopm_base_pa = from->iopm_base_pa; | 2070 | dst->iopm_base_pa = from->iopm_base_pa; |
@@ -1970,7 +2105,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1970 | if (!nested_vmcb) | 2105 | if (!nested_vmcb) |
1971 | return 1; | 2106 | return 1; |
1972 | 2107 | ||
1973 | /* Exit nested SVM mode */ | 2108 | /* Exit Guest-Mode */ |
2109 | leave_guest_mode(&svm->vcpu); | ||
1974 | svm->nested.vmcb = 0; | 2110 | svm->nested.vmcb = 0; |
1975 | 2111 | ||
1976 | /* Give the current vmcb to the guest */ | 2112 | /* Give the current vmcb to the guest */ |
@@ -1984,7 +2120,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1984 | nested_vmcb->save.idtr = vmcb->save.idtr; | 2120 | nested_vmcb->save.idtr = vmcb->save.idtr; |
1985 | nested_vmcb->save.efer = svm->vcpu.arch.efer; | 2121 | nested_vmcb->save.efer = svm->vcpu.arch.efer; |
1986 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); | 2122 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); |
1987 | nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; | 2123 | nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); |
1988 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 2124 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
1989 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | 2125 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; |
1990 | nested_vmcb->save.rflags = vmcb->save.rflags; | 2126 | nested_vmcb->save.rflags = vmcb->save.rflags; |
@@ -2061,6 +2197,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2061 | svm->vmcb->save.cpl = 0; | 2197 | svm->vmcb->save.cpl = 0; |
2062 | svm->vmcb->control.exit_int_info = 0; | 2198 | svm->vmcb->control.exit_int_info = 0; |
2063 | 2199 | ||
2200 | mark_all_dirty(svm->vmcb); | ||
2201 | |||
2064 | nested_svm_unmap(page); | 2202 | nested_svm_unmap(page); |
2065 | 2203 | ||
2066 | nested_svm_uninit_mmu_context(&svm->vcpu); | 2204 | nested_svm_uninit_mmu_context(&svm->vcpu); |
@@ -2148,8 +2286,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2148 | nested_vmcb->control.event_inj, | 2286 | nested_vmcb->control.event_inj, |
2149 | nested_vmcb->control.nested_ctl); | 2287 | nested_vmcb->control.nested_ctl); |
2150 | 2288 | ||
2151 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, | 2289 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, |
2152 | nested_vmcb->control.intercept_cr_write, | 2290 | nested_vmcb->control.intercept_cr >> 16, |
2153 | nested_vmcb->control.intercept_exceptions, | 2291 | nested_vmcb->control.intercept_exceptions, |
2154 | nested_vmcb->control.intercept); | 2292 | nested_vmcb->control.intercept); |
2155 | 2293 | ||
@@ -2177,7 +2315,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2177 | if (npt_enabled) | 2315 | if (npt_enabled) |
2178 | hsave->save.cr3 = vmcb->save.cr3; | 2316 | hsave->save.cr3 = vmcb->save.cr3; |
2179 | else | 2317 | else |
2180 | hsave->save.cr3 = svm->vcpu.arch.cr3; | 2318 | hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); |
2181 | 2319 | ||
2182 | copy_vmcb_control_area(hsave, vmcb); | 2320 | copy_vmcb_control_area(hsave, vmcb); |
2183 | 2321 | ||
@@ -2229,14 +2367,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2229 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; | 2367 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; |
2230 | 2368 | ||
2231 | /* cache intercepts */ | 2369 | /* cache intercepts */ |
2232 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; | 2370 | svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; |
2233 | svm->nested.intercept_cr_write = nested_vmcb->control.intercept_cr_write; | 2371 | svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; |
2234 | svm->nested.intercept_dr_read = nested_vmcb->control.intercept_dr_read; | ||
2235 | svm->nested.intercept_dr_write = nested_vmcb->control.intercept_dr_write; | ||
2236 | svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; | 2372 | svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; |
2237 | svm->nested.intercept = nested_vmcb->control.intercept; | 2373 | svm->nested.intercept = nested_vmcb->control.intercept; |
2238 | 2374 | ||
2239 | force_new_asid(&svm->vcpu); | 2375 | svm_flush_tlb(&svm->vcpu); |
2240 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; | 2376 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; |
2241 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) | 2377 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) |
2242 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; | 2378 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; |
@@ -2245,29 +2381,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2245 | 2381 | ||
2246 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { | 2382 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { |
2247 | /* We only want the cr8 intercept bits of the guest */ | 2383 | /* We only want the cr8 intercept bits of the guest */ |
2248 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; | 2384 | clr_cr_intercept(svm, INTERCEPT_CR8_READ); |
2249 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2385 | clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
2250 | } | 2386 | } |
2251 | 2387 | ||
2252 | /* We don't want to see VMMCALLs from a nested guest */ | 2388 | /* We don't want to see VMMCALLs from a nested guest */ |
2253 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); | 2389 | clr_intercept(svm, INTERCEPT_VMMCALL); |
2254 | |||
2255 | /* | ||
2256 | * We don't want a nested guest to be more powerful than the guest, so | ||
2257 | * all intercepts are ORed | ||
2258 | */ | ||
2259 | svm->vmcb->control.intercept_cr_read |= | ||
2260 | nested_vmcb->control.intercept_cr_read; | ||
2261 | svm->vmcb->control.intercept_cr_write |= | ||
2262 | nested_vmcb->control.intercept_cr_write; | ||
2263 | svm->vmcb->control.intercept_dr_read |= | ||
2264 | nested_vmcb->control.intercept_dr_read; | ||
2265 | svm->vmcb->control.intercept_dr_write |= | ||
2266 | nested_vmcb->control.intercept_dr_write; | ||
2267 | svm->vmcb->control.intercept_exceptions |= | ||
2268 | nested_vmcb->control.intercept_exceptions; | ||
2269 | |||
2270 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
2271 | 2390 | ||
2272 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; | 2391 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; |
2273 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 2392 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
@@ -2278,11 +2397,21 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2278 | 2397 | ||
2279 | nested_svm_unmap(page); | 2398 | nested_svm_unmap(page); |
2280 | 2399 | ||
2281 | /* nested_vmcb is our indicator if nested SVM is activated */ | 2400 | /* Enter Guest-Mode */ |
2401 | enter_guest_mode(&svm->vcpu); | ||
2402 | |||
2403 | /* | ||
2404 | * Merge guest and host intercepts - must be called with vcpu in | ||
2405 | * guest-mode to take affect here | ||
2406 | */ | ||
2407 | recalc_intercepts(svm); | ||
2408 | |||
2282 | svm->nested.vmcb = vmcb_gpa; | 2409 | svm->nested.vmcb = vmcb_gpa; |
2283 | 2410 | ||
2284 | enable_gif(svm); | 2411 | enable_gif(svm); |
2285 | 2412 | ||
2413 | mark_all_dirty(svm->vmcb); | ||
2414 | |||
2286 | return true; | 2415 | return true; |
2287 | } | 2416 | } |
2288 | 2417 | ||
@@ -2400,6 +2529,8 @@ static int clgi_interception(struct vcpu_svm *svm) | |||
2400 | svm_clear_vintr(svm); | 2529 | svm_clear_vintr(svm); |
2401 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 2530 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
2402 | 2531 | ||
2532 | mark_dirty(svm->vmcb, VMCB_INTR); | ||
2533 | |||
2403 | return 1; | 2534 | return 1; |
2404 | } | 2535 | } |
2405 | 2536 | ||
@@ -2426,6 +2557,19 @@ static int skinit_interception(struct vcpu_svm *svm) | |||
2426 | return 1; | 2557 | return 1; |
2427 | } | 2558 | } |
2428 | 2559 | ||
2560 | static int xsetbv_interception(struct vcpu_svm *svm) | ||
2561 | { | ||
2562 | u64 new_bv = kvm_read_edx_eax(&svm->vcpu); | ||
2563 | u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); | ||
2564 | |||
2565 | if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { | ||
2566 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2567 | skip_emulated_instruction(&svm->vcpu); | ||
2568 | } | ||
2569 | |||
2570 | return 1; | ||
2571 | } | ||
2572 | |||
2429 | static int invalid_op_interception(struct vcpu_svm *svm) | 2573 | static int invalid_op_interception(struct vcpu_svm *svm) |
2430 | { | 2574 | { |
2431 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 2575 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
@@ -2507,19 +2651,92 @@ static int cpuid_interception(struct vcpu_svm *svm) | |||
2507 | static int iret_interception(struct vcpu_svm *svm) | 2651 | static int iret_interception(struct vcpu_svm *svm) |
2508 | { | 2652 | { |
2509 | ++svm->vcpu.stat.nmi_window_exits; | 2653 | ++svm->vcpu.stat.nmi_window_exits; |
2510 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); | 2654 | clr_intercept(svm, INTERCEPT_IRET); |
2511 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2655 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2512 | return 1; | 2656 | return 1; |
2513 | } | 2657 | } |
2514 | 2658 | ||
2515 | static int invlpg_interception(struct vcpu_svm *svm) | 2659 | static int invlpg_interception(struct vcpu_svm *svm) |
2516 | { | 2660 | { |
2517 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; | 2661 | if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) |
2662 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; | ||
2663 | |||
2664 | kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); | ||
2665 | skip_emulated_instruction(&svm->vcpu); | ||
2666 | return 1; | ||
2518 | } | 2667 | } |
2519 | 2668 | ||
2520 | static int emulate_on_interception(struct vcpu_svm *svm) | 2669 | static int emulate_on_interception(struct vcpu_svm *svm) |
2521 | { | 2670 | { |
2522 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; | 2671 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; |
2672 | } | ||
2673 | |||
2674 | #define CR_VALID (1ULL << 63) | ||
2675 | |||
2676 | static int cr_interception(struct vcpu_svm *svm) | ||
2677 | { | ||
2678 | int reg, cr; | ||
2679 | unsigned long val; | ||
2680 | int err; | ||
2681 | |||
2682 | if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) | ||
2683 | return emulate_on_interception(svm); | ||
2684 | |||
2685 | if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0)) | ||
2686 | return emulate_on_interception(svm); | ||
2687 | |||
2688 | reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; | ||
2689 | cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; | ||
2690 | |||
2691 | err = 0; | ||
2692 | if (cr >= 16) { /* mov to cr */ | ||
2693 | cr -= 16; | ||
2694 | val = kvm_register_read(&svm->vcpu, reg); | ||
2695 | switch (cr) { | ||
2696 | case 0: | ||
2697 | err = kvm_set_cr0(&svm->vcpu, val); | ||
2698 | break; | ||
2699 | case 3: | ||
2700 | err = kvm_set_cr3(&svm->vcpu, val); | ||
2701 | break; | ||
2702 | case 4: | ||
2703 | err = kvm_set_cr4(&svm->vcpu, val); | ||
2704 | break; | ||
2705 | case 8: | ||
2706 | err = kvm_set_cr8(&svm->vcpu, val); | ||
2707 | break; | ||
2708 | default: | ||
2709 | WARN(1, "unhandled write to CR%d", cr); | ||
2710 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
2711 | return 1; | ||
2712 | } | ||
2713 | } else { /* mov from cr */ | ||
2714 | switch (cr) { | ||
2715 | case 0: | ||
2716 | val = kvm_read_cr0(&svm->vcpu); | ||
2717 | break; | ||
2718 | case 2: | ||
2719 | val = svm->vcpu.arch.cr2; | ||
2720 | break; | ||
2721 | case 3: | ||
2722 | val = kvm_read_cr3(&svm->vcpu); | ||
2723 | break; | ||
2724 | case 4: | ||
2725 | val = kvm_read_cr4(&svm->vcpu); | ||
2726 | break; | ||
2727 | case 8: | ||
2728 | val = kvm_get_cr8(&svm->vcpu); | ||
2729 | break; | ||
2730 | default: | ||
2731 | WARN(1, "unhandled read from CR%d", cr); | ||
2732 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
2733 | return 1; | ||
2734 | } | ||
2735 | kvm_register_write(&svm->vcpu, reg, val); | ||
2736 | } | ||
2737 | kvm_complete_insn_gp(&svm->vcpu, err); | ||
2738 | |||
2739 | return 1; | ||
2523 | } | 2740 | } |
2524 | 2741 | ||
2525 | static int cr0_write_interception(struct vcpu_svm *svm) | 2742 | static int cr0_write_interception(struct vcpu_svm *svm) |
@@ -2527,7 +2744,7 @@ static int cr0_write_interception(struct vcpu_svm *svm) | |||
2527 | struct kvm_vcpu *vcpu = &svm->vcpu; | 2744 | struct kvm_vcpu *vcpu = &svm->vcpu; |
2528 | int r; | 2745 | int r; |
2529 | 2746 | ||
2530 | r = emulate_instruction(&svm->vcpu, 0, 0, 0); | 2747 | r = cr_interception(svm); |
2531 | 2748 | ||
2532 | if (svm->nested.vmexit_rip) { | 2749 | if (svm->nested.vmexit_rip) { |
2533 | kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); | 2750 | kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); |
@@ -2536,22 +2753,47 @@ static int cr0_write_interception(struct vcpu_svm *svm) | |||
2536 | svm->nested.vmexit_rip = 0; | 2753 | svm->nested.vmexit_rip = 0; |
2537 | } | 2754 | } |
2538 | 2755 | ||
2539 | return r == EMULATE_DONE; | 2756 | return r; |
2757 | } | ||
2758 | |||
2759 | static int dr_interception(struct vcpu_svm *svm) | ||
2760 | { | ||
2761 | int reg, dr; | ||
2762 | unsigned long val; | ||
2763 | int err; | ||
2764 | |||
2765 | if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) | ||
2766 | return emulate_on_interception(svm); | ||
2767 | |||
2768 | reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; | ||
2769 | dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; | ||
2770 | |||
2771 | if (dr >= 16) { /* mov to DRn */ | ||
2772 | val = kvm_register_read(&svm->vcpu, reg); | ||
2773 | kvm_set_dr(&svm->vcpu, dr - 16, val); | ||
2774 | } else { | ||
2775 | err = kvm_get_dr(&svm->vcpu, dr, &val); | ||
2776 | if (!err) | ||
2777 | kvm_register_write(&svm->vcpu, reg, val); | ||
2778 | } | ||
2779 | |||
2780 | return 1; | ||
2540 | } | 2781 | } |
2541 | 2782 | ||
2542 | static int cr8_write_interception(struct vcpu_svm *svm) | 2783 | static int cr8_write_interception(struct vcpu_svm *svm) |
2543 | { | 2784 | { |
2544 | struct kvm_run *kvm_run = svm->vcpu.run; | 2785 | struct kvm_run *kvm_run = svm->vcpu.run; |
2786 | int r; | ||
2545 | 2787 | ||
2546 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 2788 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
2547 | /* instruction emulation calls kvm_set_cr8() */ | 2789 | /* instruction emulation calls kvm_set_cr8() */ |
2548 | emulate_instruction(&svm->vcpu, 0, 0, 0); | 2790 | r = cr_interception(svm); |
2549 | if (irqchip_in_kernel(svm->vcpu.kvm)) { | 2791 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
2550 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2792 | clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
2551 | return 1; | 2793 | return r; |
2552 | } | 2794 | } |
2553 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) | 2795 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) |
2554 | return 1; | 2796 | return r; |
2555 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 2797 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; |
2556 | return 0; | 2798 | return 0; |
2557 | } | 2799 | } |
@@ -2562,14 +2804,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2562 | 2804 | ||
2563 | switch (ecx) { | 2805 | switch (ecx) { |
2564 | case MSR_IA32_TSC: { | 2806 | case MSR_IA32_TSC: { |
2565 | u64 tsc_offset; | 2807 | struct vmcb *vmcb = get_host_vmcb(svm); |
2566 | 2808 | ||
2567 | if (is_nested(svm)) | 2809 | *data = vmcb->control.tsc_offset + native_read_tsc(); |
2568 | tsc_offset = svm->nested.hsave->control.tsc_offset; | ||
2569 | else | ||
2570 | tsc_offset = svm->vmcb->control.tsc_offset; | ||
2571 | |||
2572 | *data = tsc_offset + native_read_tsc(); | ||
2573 | break; | 2810 | break; |
2574 | } | 2811 | } |
2575 | case MSR_STAR: | 2812 | case MSR_STAR: |
@@ -2714,7 +2951,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2714 | svm->vmcb->save.sysenter_esp = data; | 2951 | svm->vmcb->save.sysenter_esp = data; |
2715 | break; | 2952 | break; |
2716 | case MSR_IA32_DEBUGCTLMSR: | 2953 | case MSR_IA32_DEBUGCTLMSR: |
2717 | if (!svm_has(SVM_FEATURE_LBRV)) { | 2954 | if (!boot_cpu_has(X86_FEATURE_LBRV)) { |
2718 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", | 2955 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", |
2719 | __func__, data); | 2956 | __func__, data); |
2720 | break; | 2957 | break; |
@@ -2723,6 +2960,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2723 | return 1; | 2960 | return 1; |
2724 | 2961 | ||
2725 | svm->vmcb->save.dbgctl = data; | 2962 | svm->vmcb->save.dbgctl = data; |
2963 | mark_dirty(svm->vmcb, VMCB_LBR); | ||
2726 | if (data & (1ULL<<0)) | 2964 | if (data & (1ULL<<0)) |
2727 | svm_enable_lbrv(svm); | 2965 | svm_enable_lbrv(svm); |
2728 | else | 2966 | else |
@@ -2775,6 +3013,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) | |||
2775 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3013 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
2776 | svm_clear_vintr(svm); | 3014 | svm_clear_vintr(svm); |
2777 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 3015 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
3016 | mark_dirty(svm->vmcb, VMCB_INTR); | ||
2778 | /* | 3017 | /* |
2779 | * If the user space waits to inject interrupts, exit as soon as | 3018 | * If the user space waits to inject interrupts, exit as soon as |
2780 | * possible | 3019 | * possible |
@@ -2797,31 +3036,31 @@ static int pause_interception(struct vcpu_svm *svm) | |||
2797 | } | 3036 | } |
2798 | 3037 | ||
2799 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 3038 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { |
2800 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 3039 | [SVM_EXIT_READ_CR0] = cr_interception, |
2801 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 3040 | [SVM_EXIT_READ_CR3] = cr_interception, |
2802 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 3041 | [SVM_EXIT_READ_CR4] = cr_interception, |
2803 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 3042 | [SVM_EXIT_READ_CR8] = cr_interception, |
2804 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 3043 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2805 | [SVM_EXIT_WRITE_CR0] = cr0_write_interception, | 3044 | [SVM_EXIT_WRITE_CR0] = cr0_write_interception, |
2806 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 3045 | [SVM_EXIT_WRITE_CR3] = cr_interception, |
2807 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 3046 | [SVM_EXIT_WRITE_CR4] = cr_interception, |
2808 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 3047 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
2809 | [SVM_EXIT_READ_DR0] = emulate_on_interception, | 3048 | [SVM_EXIT_READ_DR0] = dr_interception, |
2810 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 3049 | [SVM_EXIT_READ_DR1] = dr_interception, |
2811 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 3050 | [SVM_EXIT_READ_DR2] = dr_interception, |
2812 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 3051 | [SVM_EXIT_READ_DR3] = dr_interception, |
2813 | [SVM_EXIT_READ_DR4] = emulate_on_interception, | 3052 | [SVM_EXIT_READ_DR4] = dr_interception, |
2814 | [SVM_EXIT_READ_DR5] = emulate_on_interception, | 3053 | [SVM_EXIT_READ_DR5] = dr_interception, |
2815 | [SVM_EXIT_READ_DR6] = emulate_on_interception, | 3054 | [SVM_EXIT_READ_DR6] = dr_interception, |
2816 | [SVM_EXIT_READ_DR7] = emulate_on_interception, | 3055 | [SVM_EXIT_READ_DR7] = dr_interception, |
2817 | [SVM_EXIT_WRITE_DR0] = emulate_on_interception, | 3056 | [SVM_EXIT_WRITE_DR0] = dr_interception, |
2818 | [SVM_EXIT_WRITE_DR1] = emulate_on_interception, | 3057 | [SVM_EXIT_WRITE_DR1] = dr_interception, |
2819 | [SVM_EXIT_WRITE_DR2] = emulate_on_interception, | 3058 | [SVM_EXIT_WRITE_DR2] = dr_interception, |
2820 | [SVM_EXIT_WRITE_DR3] = emulate_on_interception, | 3059 | [SVM_EXIT_WRITE_DR3] = dr_interception, |
2821 | [SVM_EXIT_WRITE_DR4] = emulate_on_interception, | 3060 | [SVM_EXIT_WRITE_DR4] = dr_interception, |
2822 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, | 3061 | [SVM_EXIT_WRITE_DR5] = dr_interception, |
2823 | [SVM_EXIT_WRITE_DR6] = emulate_on_interception, | 3062 | [SVM_EXIT_WRITE_DR6] = dr_interception, |
2824 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, | 3063 | [SVM_EXIT_WRITE_DR7] = dr_interception, |
2825 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 3064 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2826 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 3065 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
2827 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 3066 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
@@ -2854,6 +3093,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2854 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 3093 | [SVM_EXIT_WBINVD] = emulate_on_interception, |
2855 | [SVM_EXIT_MONITOR] = invalid_op_interception, | 3094 | [SVM_EXIT_MONITOR] = invalid_op_interception, |
2856 | [SVM_EXIT_MWAIT] = invalid_op_interception, | 3095 | [SVM_EXIT_MWAIT] = invalid_op_interception, |
3096 | [SVM_EXIT_XSETBV] = xsetbv_interception, | ||
2857 | [SVM_EXIT_NPF] = pf_interception, | 3097 | [SVM_EXIT_NPF] = pf_interception, |
2858 | }; | 3098 | }; |
2859 | 3099 | ||
@@ -2864,10 +3104,10 @@ void dump_vmcb(struct kvm_vcpu *vcpu) | |||
2864 | struct vmcb_save_area *save = &svm->vmcb->save; | 3104 | struct vmcb_save_area *save = &svm->vmcb->save; |
2865 | 3105 | ||
2866 | pr_err("VMCB Control Area:\n"); | 3106 | pr_err("VMCB Control Area:\n"); |
2867 | pr_err("cr_read: %04x\n", control->intercept_cr_read); | 3107 | pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); |
2868 | pr_err("cr_write: %04x\n", control->intercept_cr_write); | 3108 | pr_err("cr_write: %04x\n", control->intercept_cr >> 16); |
2869 | pr_err("dr_read: %04x\n", control->intercept_dr_read); | 3109 | pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); |
2870 | pr_err("dr_write: %04x\n", control->intercept_dr_write); | 3110 | pr_err("dr_write: %04x\n", control->intercept_dr >> 16); |
2871 | pr_err("exceptions: %08x\n", control->intercept_exceptions); | 3111 | pr_err("exceptions: %08x\n", control->intercept_exceptions); |
2872 | pr_err("intercepts: %016llx\n", control->intercept); | 3112 | pr_err("intercepts: %016llx\n", control->intercept); |
2873 | pr_err("pause filter count: %d\n", control->pause_filter_count); | 3113 | pr_err("pause filter count: %d\n", control->pause_filter_count); |
@@ -2950,15 +3190,23 @@ void dump_vmcb(struct kvm_vcpu *vcpu) | |||
2950 | 3190 | ||
2951 | } | 3191 | } |
2952 | 3192 | ||
3193 | static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | ||
3194 | { | ||
3195 | struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; | ||
3196 | |||
3197 | *info1 = control->exit_info_1; | ||
3198 | *info2 = control->exit_info_2; | ||
3199 | } | ||
3200 | |||
2953 | static int handle_exit(struct kvm_vcpu *vcpu) | 3201 | static int handle_exit(struct kvm_vcpu *vcpu) |
2954 | { | 3202 | { |
2955 | struct vcpu_svm *svm = to_svm(vcpu); | 3203 | struct vcpu_svm *svm = to_svm(vcpu); |
2956 | struct kvm_run *kvm_run = vcpu->run; | 3204 | struct kvm_run *kvm_run = vcpu->run; |
2957 | u32 exit_code = svm->vmcb->control.exit_code; | 3205 | u32 exit_code = svm->vmcb->control.exit_code; |
2958 | 3206 | ||
2959 | trace_kvm_exit(exit_code, vcpu); | 3207 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); |
2960 | 3208 | ||
2961 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | 3209 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) |
2962 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 3210 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
2963 | if (npt_enabled) | 3211 | if (npt_enabled) |
2964 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | 3212 | vcpu->arch.cr3 = svm->vmcb->save.cr3; |
@@ -2970,7 +3218,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2970 | return 1; | 3218 | return 1; |
2971 | } | 3219 | } |
2972 | 3220 | ||
2973 | if (is_nested(svm)) { | 3221 | if (is_guest_mode(vcpu)) { |
2974 | int vmexit; | 3222 | int vmexit; |
2975 | 3223 | ||
2976 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, | 3224 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, |
@@ -3033,7 +3281,6 @@ static void pre_svm_run(struct vcpu_svm *svm) | |||
3033 | 3281 | ||
3034 | struct svm_cpu_data *sd = per_cpu(svm_data, cpu); | 3282 | struct svm_cpu_data *sd = per_cpu(svm_data, cpu); |
3035 | 3283 | ||
3036 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; | ||
3037 | /* FIXME: handle wraparound of asid_generation */ | 3284 | /* FIXME: handle wraparound of asid_generation */ |
3038 | if (svm->asid_generation != sd->asid_generation) | 3285 | if (svm->asid_generation != sd->asid_generation) |
3039 | new_asid(svm, sd); | 3286 | new_asid(svm, sd); |
@@ -3045,7 +3292,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) | |||
3045 | 3292 | ||
3046 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | 3293 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; |
3047 | vcpu->arch.hflags |= HF_NMI_MASK; | 3294 | vcpu->arch.hflags |= HF_NMI_MASK; |
3048 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); | 3295 | set_intercept(svm, INTERCEPT_IRET); |
3049 | ++vcpu->stat.nmi_injections; | 3296 | ++vcpu->stat.nmi_injections; |
3050 | } | 3297 | } |
3051 | 3298 | ||
@@ -3058,6 +3305,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
3058 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 3305 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
3059 | control->int_ctl |= V_IRQ_MASK | | 3306 | control->int_ctl |= V_IRQ_MASK | |
3060 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); | 3307 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); |
3308 | mark_dirty(svm->vmcb, VMCB_INTR); | ||
3061 | } | 3309 | } |
3062 | 3310 | ||
3063 | static void svm_set_irq(struct kvm_vcpu *vcpu) | 3311 | static void svm_set_irq(struct kvm_vcpu *vcpu) |
@@ -3077,14 +3325,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
3077 | { | 3325 | { |
3078 | struct vcpu_svm *svm = to_svm(vcpu); | 3326 | struct vcpu_svm *svm = to_svm(vcpu); |
3079 | 3327 | ||
3080 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 3328 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) |
3081 | return; | 3329 | return; |
3082 | 3330 | ||
3083 | if (irr == -1) | 3331 | if (irr == -1) |
3084 | return; | 3332 | return; |
3085 | 3333 | ||
3086 | if (tpr >= irr) | 3334 | if (tpr >= irr) |
3087 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; | 3335 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
3088 | } | 3336 | } |
3089 | 3337 | ||
3090 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 3338 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -3112,10 +3360,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
3112 | 3360 | ||
3113 | if (masked) { | 3361 | if (masked) { |
3114 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | 3362 | svm->vcpu.arch.hflags |= HF_NMI_MASK; |
3115 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); | 3363 | set_intercept(svm, INTERCEPT_IRET); |
3116 | } else { | 3364 | } else { |
3117 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | 3365 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; |
3118 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); | 3366 | clr_intercept(svm, INTERCEPT_IRET); |
3119 | } | 3367 | } |
3120 | } | 3368 | } |
3121 | 3369 | ||
@@ -3131,7 +3379,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
3131 | 3379 | ||
3132 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | 3380 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); |
3133 | 3381 | ||
3134 | if (is_nested(svm)) | 3382 | if (is_guest_mode(vcpu)) |
3135 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | 3383 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); |
3136 | 3384 | ||
3137 | return ret; | 3385 | return ret; |
@@ -3177,7 +3425,12 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
3177 | 3425 | ||
3178 | static void svm_flush_tlb(struct kvm_vcpu *vcpu) | 3426 | static void svm_flush_tlb(struct kvm_vcpu *vcpu) |
3179 | { | 3427 | { |
3180 | force_new_asid(vcpu); | 3428 | struct vcpu_svm *svm = to_svm(vcpu); |
3429 | |||
3430 | if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) | ||
3431 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; | ||
3432 | else | ||
3433 | svm->asid_generation--; | ||
3181 | } | 3434 | } |
3182 | 3435 | ||
3183 | static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) | 3436 | static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) |
@@ -3188,10 +3441,10 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
3188 | { | 3441 | { |
3189 | struct vcpu_svm *svm = to_svm(vcpu); | 3442 | struct vcpu_svm *svm = to_svm(vcpu); |
3190 | 3443 | ||
3191 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 3444 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) |
3192 | return; | 3445 | return; |
3193 | 3446 | ||
3194 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 3447 | if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { |
3195 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 3448 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
3196 | kvm_set_cr8(vcpu, cr8); | 3449 | kvm_set_cr8(vcpu, cr8); |
3197 | } | 3450 | } |
@@ -3202,7 +3455,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
3202 | struct vcpu_svm *svm = to_svm(vcpu); | 3455 | struct vcpu_svm *svm = to_svm(vcpu); |
3203 | u64 cr8; | 3456 | u64 cr8; |
3204 | 3457 | ||
3205 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 3458 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) |
3206 | return; | 3459 | return; |
3207 | 3460 | ||
3208 | cr8 = kvm_get_cr8(vcpu); | 3461 | cr8 = kvm_get_cr8(vcpu); |
@@ -3289,9 +3542,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) | |||
3289 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) | 3542 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
3290 | { | 3543 | { |
3291 | struct vcpu_svm *svm = to_svm(vcpu); | 3544 | struct vcpu_svm *svm = to_svm(vcpu); |
3292 | u16 fs_selector; | ||
3293 | u16 gs_selector; | ||
3294 | u16 ldt_selector; | ||
3295 | 3545 | ||
3296 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 3546 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; |
3297 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | 3547 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; |
@@ -3308,10 +3558,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3308 | 3558 | ||
3309 | sync_lapic_to_cr8(vcpu); | 3559 | sync_lapic_to_cr8(vcpu); |
3310 | 3560 | ||
3311 | save_host_msrs(vcpu); | ||
3312 | savesegment(fs, fs_selector); | ||
3313 | savesegment(gs, gs_selector); | ||
3314 | ldt_selector = kvm_read_ldt(); | ||
3315 | svm->vmcb->save.cr2 = vcpu->arch.cr2; | 3561 | svm->vmcb->save.cr2 = vcpu->arch.cr2; |
3316 | 3562 | ||
3317 | clgi(); | 3563 | clgi(); |
@@ -3389,20 +3635,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3389 | #endif | 3635 | #endif |
3390 | ); | 3636 | ); |
3391 | 3637 | ||
3392 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | ||
3393 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | ||
3394 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
3395 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | ||
3396 | |||
3397 | load_host_msrs(vcpu); | ||
3398 | loadsegment(fs, fs_selector); | ||
3399 | #ifdef CONFIG_X86_64 | 3638 | #ifdef CONFIG_X86_64 |
3400 | load_gs_index(gs_selector); | 3639 | wrmsrl(MSR_GS_BASE, svm->host.gs_base); |
3401 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
3402 | #else | 3640 | #else |
3403 | loadsegment(gs, gs_selector); | 3641 | loadsegment(fs, svm->host.fs); |
3404 | #endif | 3642 | #endif |
3405 | kvm_load_ldt(ldt_selector); | ||
3406 | 3643 | ||
3407 | reload_tss(vcpu); | 3644 | reload_tss(vcpu); |
3408 | 3645 | ||
@@ -3410,10 +3647,21 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3410 | 3647 | ||
3411 | stgi(); | 3648 | stgi(); |
3412 | 3649 | ||
3650 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | ||
3651 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | ||
3652 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
3653 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | ||
3654 | |||
3413 | sync_cr8_to_lapic(vcpu); | 3655 | sync_cr8_to_lapic(vcpu); |
3414 | 3656 | ||
3415 | svm->next_rip = 0; | 3657 | svm->next_rip = 0; |
3416 | 3658 | ||
3659 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; | ||
3660 | |||
3661 | /* if exit due to PF check for async PF */ | ||
3662 | if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) | ||
3663 | svm->apf_reason = kvm_read_and_reset_pf_reason(); | ||
3664 | |||
3417 | if (npt_enabled) { | 3665 | if (npt_enabled) { |
3418 | vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); | 3666 | vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); |
3419 | vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); | 3667 | vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); |
@@ -3426,6 +3674,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3426 | if (unlikely(svm->vmcb->control.exit_code == | 3674 | if (unlikely(svm->vmcb->control.exit_code == |
3427 | SVM_EXIT_EXCP_BASE + MC_VECTOR)) | 3675 | SVM_EXIT_EXCP_BASE + MC_VECTOR)) |
3428 | svm_handle_mce(svm); | 3676 | svm_handle_mce(svm); |
3677 | |||
3678 | mark_all_clean(svm->vmcb); | ||
3429 | } | 3679 | } |
3430 | 3680 | ||
3431 | #undef R | 3681 | #undef R |
@@ -3435,7 +3685,8 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
3435 | struct vcpu_svm *svm = to_svm(vcpu); | 3685 | struct vcpu_svm *svm = to_svm(vcpu); |
3436 | 3686 | ||
3437 | svm->vmcb->save.cr3 = root; | 3687 | svm->vmcb->save.cr3 = root; |
3438 | force_new_asid(vcpu); | 3688 | mark_dirty(svm->vmcb, VMCB_CR); |
3689 | svm_flush_tlb(vcpu); | ||
3439 | } | 3690 | } |
3440 | 3691 | ||
3441 | static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) | 3692 | static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) |
@@ -3443,11 +3694,13 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
3443 | struct vcpu_svm *svm = to_svm(vcpu); | 3694 | struct vcpu_svm *svm = to_svm(vcpu); |
3444 | 3695 | ||
3445 | svm->vmcb->control.nested_cr3 = root; | 3696 | svm->vmcb->control.nested_cr3 = root; |
3697 | mark_dirty(svm->vmcb, VMCB_NPT); | ||
3446 | 3698 | ||
3447 | /* Also sync guest cr3 here in case we live migrate */ | 3699 | /* Also sync guest cr3 here in case we live migrate */ |
3448 | svm->vmcb->save.cr3 = vcpu->arch.cr3; | 3700 | svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); |
3701 | mark_dirty(svm->vmcb, VMCB_CR); | ||
3449 | 3702 | ||
3450 | force_new_asid(vcpu); | 3703 | svm_flush_tlb(vcpu); |
3451 | } | 3704 | } |
3452 | 3705 | ||
3453 | static int is_disabled(void) | 3706 | static int is_disabled(void) |
@@ -3507,7 +3760,7 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
3507 | additional features */ | 3760 | additional features */ |
3508 | 3761 | ||
3509 | /* Support next_rip if host supports it */ | 3762 | /* Support next_rip if host supports it */ |
3510 | if (svm_has(SVM_FEATURE_NRIP)) | 3763 | if (boot_cpu_has(X86_FEATURE_NRIPS)) |
3511 | entry->edx |= SVM_FEATURE_NRIP; | 3764 | entry->edx |= SVM_FEATURE_NRIP; |
3512 | 3765 | ||
3513 | /* Support NPT for the guest if enabled */ | 3766 | /* Support NPT for the guest if enabled */ |
@@ -3567,6 +3820,7 @@ static const struct trace_print_flags svm_exit_reasons_str[] = { | |||
3567 | { SVM_EXIT_WBINVD, "wbinvd" }, | 3820 | { SVM_EXIT_WBINVD, "wbinvd" }, |
3568 | { SVM_EXIT_MONITOR, "monitor" }, | 3821 | { SVM_EXIT_MONITOR, "monitor" }, |
3569 | { SVM_EXIT_MWAIT, "mwait" }, | 3822 | { SVM_EXIT_MWAIT, "mwait" }, |
3823 | { SVM_EXIT_XSETBV, "xsetbv" }, | ||
3570 | { SVM_EXIT_NPF, "npf" }, | 3824 | { SVM_EXIT_NPF, "npf" }, |
3571 | { -1, NULL } | 3825 | { -1, NULL } |
3572 | }; | 3826 | }; |
@@ -3590,9 +3844,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
3590 | { | 3844 | { |
3591 | struct vcpu_svm *svm = to_svm(vcpu); | 3845 | struct vcpu_svm *svm = to_svm(vcpu); |
3592 | 3846 | ||
3593 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | 3847 | set_exception_intercept(svm, NM_VECTOR); |
3594 | if (is_nested(svm)) | ||
3595 | svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
3596 | update_cr0_intercept(svm); | 3848 | update_cr0_intercept(svm); |
3597 | } | 3849 | } |
3598 | 3850 | ||
@@ -3623,6 +3875,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3623 | .get_cpl = svm_get_cpl, | 3875 | .get_cpl = svm_get_cpl, |
3624 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, | 3876 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, |
3625 | .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, | 3877 | .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, |
3878 | .decache_cr3 = svm_decache_cr3, | ||
3626 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, | 3879 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, |
3627 | .set_cr0 = svm_set_cr0, | 3880 | .set_cr0 = svm_set_cr0, |
3628 | .set_cr3 = svm_set_cr3, | 3881 | .set_cr3 = svm_set_cr3, |
@@ -3663,7 +3916,9 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3663 | .get_tdp_level = get_npt_level, | 3916 | .get_tdp_level = get_npt_level, |
3664 | .get_mt_mask = svm_get_mt_mask, | 3917 | .get_mt_mask = svm_get_mt_mask, |
3665 | 3918 | ||
3919 | .get_exit_info = svm_get_exit_info, | ||
3666 | .exit_reasons_str = svm_exit_reasons_str, | 3920 | .exit_reasons_str = svm_exit_reasons_str, |
3921 | |||
3667 | .get_lpage_level = svm_get_lpage_level, | 3922 | .get_lpage_level = svm_get_lpage_level, |
3668 | 3923 | ||
3669 | .cpuid_update = svm_cpuid_update, | 3924 | .cpuid_update = svm_cpuid_update, |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a6544b8e7c0f..1357d7cf4ec8 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -178,27 +178,36 @@ TRACE_EVENT(kvm_apic, | |||
178 | #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) | 178 | #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) |
179 | #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) | 179 | #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) |
180 | 180 | ||
181 | #define KVM_ISA_VMX 1 | ||
182 | #define KVM_ISA_SVM 2 | ||
183 | |||
181 | /* | 184 | /* |
182 | * Tracepoint for kvm guest exit: | 185 | * Tracepoint for kvm guest exit: |
183 | */ | 186 | */ |
184 | TRACE_EVENT(kvm_exit, | 187 | TRACE_EVENT(kvm_exit, |
185 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), | 188 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa), |
186 | TP_ARGS(exit_reason, vcpu), | 189 | TP_ARGS(exit_reason, vcpu, isa), |
187 | 190 | ||
188 | TP_STRUCT__entry( | 191 | TP_STRUCT__entry( |
189 | __field( unsigned int, exit_reason ) | 192 | __field( unsigned int, exit_reason ) |
190 | __field( unsigned long, guest_rip ) | 193 | __field( unsigned long, guest_rip ) |
194 | __field( u32, isa ) | ||
195 | __field( u64, info1 ) | ||
196 | __field( u64, info2 ) | ||
191 | ), | 197 | ), |
192 | 198 | ||
193 | TP_fast_assign( | 199 | TP_fast_assign( |
194 | __entry->exit_reason = exit_reason; | 200 | __entry->exit_reason = exit_reason; |
195 | __entry->guest_rip = kvm_rip_read(vcpu); | 201 | __entry->guest_rip = kvm_rip_read(vcpu); |
202 | __entry->isa = isa; | ||
203 | kvm_x86_ops->get_exit_info(vcpu, &__entry->info1, | ||
204 | &__entry->info2); | ||
196 | ), | 205 | ), |
197 | 206 | ||
198 | TP_printk("reason %s rip 0x%lx", | 207 | TP_printk("reason %s rip 0x%lx info %llx %llx", |
199 | ftrace_print_symbols_seq(p, __entry->exit_reason, | 208 | ftrace_print_symbols_seq(p, __entry->exit_reason, |
200 | kvm_x86_ops->exit_reasons_str), | 209 | kvm_x86_ops->exit_reasons_str), |
201 | __entry->guest_rip) | 210 | __entry->guest_rip, __entry->info1, __entry->info2) |
202 | ); | 211 | ); |
203 | 212 | ||
204 | /* | 213 | /* |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8da0e45ff7c9..bf89ec2cfb82 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); | |||
69 | static int __read_mostly vmm_exclusive = 1; | 69 | static int __read_mostly vmm_exclusive = 1; |
70 | module_param(vmm_exclusive, bool, S_IRUGO); | 70 | module_param(vmm_exclusive, bool, S_IRUGO); |
71 | 71 | ||
72 | static int __read_mostly yield_on_hlt = 1; | ||
73 | module_param(yield_on_hlt, bool, S_IRUGO); | ||
74 | |||
72 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 75 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
73 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 76 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) |
74 | #define KVM_GUEST_CR0_MASK \ | 77 | #define KVM_GUEST_CR0_MASK \ |
@@ -177,6 +180,7 @@ static int init_rmode(struct kvm *kvm); | |||
177 | static u64 construct_eptp(unsigned long root_hpa); | 180 | static u64 construct_eptp(unsigned long root_hpa); |
178 | static void kvm_cpu_vmxon(u64 addr); | 181 | static void kvm_cpu_vmxon(u64 addr); |
179 | static void kvm_cpu_vmxoff(void); | 182 | static void kvm_cpu_vmxoff(void); |
183 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
180 | 184 | ||
181 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
182 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -188,6 +192,8 @@ static unsigned long *vmx_io_bitmap_b; | |||
188 | static unsigned long *vmx_msr_bitmap_legacy; | 192 | static unsigned long *vmx_msr_bitmap_legacy; |
189 | static unsigned long *vmx_msr_bitmap_longmode; | 193 | static unsigned long *vmx_msr_bitmap_longmode; |
190 | 194 | ||
195 | static bool cpu_has_load_ia32_efer; | ||
196 | |||
191 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); | 197 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); |
192 | static DEFINE_SPINLOCK(vmx_vpid_lock); | 198 | static DEFINE_SPINLOCK(vmx_vpid_lock); |
193 | 199 | ||
@@ -472,7 +478,7 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
472 | u8 error; | 478 | u8 error; |
473 | 479 | ||
474 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" | 480 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" |
475 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 481 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
476 | : "cc", "memory"); | 482 | : "cc", "memory"); |
477 | if (error) | 483 | if (error) |
478 | printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", | 484 | printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", |
@@ -485,7 +491,7 @@ static void vmcs_load(struct vmcs *vmcs) | |||
485 | u8 error; | 491 | u8 error; |
486 | 492 | ||
487 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | 493 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" |
488 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 494 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
489 | : "cc", "memory"); | 495 | : "cc", "memory"); |
490 | if (error) | 496 | if (error) |
491 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | 497 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", |
@@ -565,10 +571,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | |||
565 | 571 | ||
566 | static unsigned long vmcs_readl(unsigned long field) | 572 | static unsigned long vmcs_readl(unsigned long field) |
567 | { | 573 | { |
568 | unsigned long value; | 574 | unsigned long value = 0; |
569 | 575 | ||
570 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) | 576 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) |
571 | : "=a"(value) : "d"(field) : "cc"); | 577 | : "+a"(value) : "d"(field) : "cc"); |
572 | return value; | 578 | return value; |
573 | } | 579 | } |
574 | 580 | ||
@@ -661,6 +667,12 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
661 | unsigned i; | 667 | unsigned i; |
662 | struct msr_autoload *m = &vmx->msr_autoload; | 668 | struct msr_autoload *m = &vmx->msr_autoload; |
663 | 669 | ||
670 | if (msr == MSR_EFER && cpu_has_load_ia32_efer) { | ||
671 | vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); | ||
672 | vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); | ||
673 | return; | ||
674 | } | ||
675 | |||
664 | for (i = 0; i < m->nr; ++i) | 676 | for (i = 0; i < m->nr; ++i) |
665 | if (m->guest[i].index == msr) | 677 | if (m->guest[i].index == msr) |
666 | break; | 678 | break; |
@@ -680,6 +692,14 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
680 | unsigned i; | 692 | unsigned i; |
681 | struct msr_autoload *m = &vmx->msr_autoload; | 693 | struct msr_autoload *m = &vmx->msr_autoload; |
682 | 694 | ||
695 | if (msr == MSR_EFER && cpu_has_load_ia32_efer) { | ||
696 | vmcs_write64(GUEST_IA32_EFER, guest_val); | ||
697 | vmcs_write64(HOST_IA32_EFER, host_val); | ||
698 | vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); | ||
699 | vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); | ||
700 | return; | ||
701 | } | ||
702 | |||
683 | for (i = 0; i < m->nr; ++i) | 703 | for (i = 0; i < m->nr; ++i) |
684 | if (m->guest[i].index == msr) | 704 | if (m->guest[i].index == msr) |
685 | break; | 705 | break; |
@@ -821,10 +841,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
821 | #endif | 841 | #endif |
822 | 842 | ||
823 | #ifdef CONFIG_X86_64 | 843 | #ifdef CONFIG_X86_64 |
824 | if (is_long_mode(&vmx->vcpu)) { | 844 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
825 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 845 | if (is_long_mode(&vmx->vcpu)) |
826 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 846 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
827 | } | ||
828 | #endif | 847 | #endif |
829 | for (i = 0; i < vmx->save_nmsrs; ++i) | 848 | for (i = 0; i < vmx->save_nmsrs; ++i) |
830 | kvm_set_shared_msr(vmx->guest_msrs[i].index, | 849 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
@@ -839,23 +858,23 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
839 | 858 | ||
840 | ++vmx->vcpu.stat.host_state_reload; | 859 | ++vmx->vcpu.stat.host_state_reload; |
841 | vmx->host_state.loaded = 0; | 860 | vmx->host_state.loaded = 0; |
842 | if (vmx->host_state.fs_reload_needed) | 861 | #ifdef CONFIG_X86_64 |
843 | loadsegment(fs, vmx->host_state.fs_sel); | 862 | if (is_long_mode(&vmx->vcpu)) |
863 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | ||
864 | #endif | ||
844 | if (vmx->host_state.gs_ldt_reload_needed) { | 865 | if (vmx->host_state.gs_ldt_reload_needed) { |
845 | kvm_load_ldt(vmx->host_state.ldt_sel); | 866 | kvm_load_ldt(vmx->host_state.ldt_sel); |
846 | #ifdef CONFIG_X86_64 | 867 | #ifdef CONFIG_X86_64 |
847 | load_gs_index(vmx->host_state.gs_sel); | 868 | load_gs_index(vmx->host_state.gs_sel); |
848 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
849 | #else | 869 | #else |
850 | loadsegment(gs, vmx->host_state.gs_sel); | 870 | loadsegment(gs, vmx->host_state.gs_sel); |
851 | #endif | 871 | #endif |
852 | } | 872 | } |
873 | if (vmx->host_state.fs_reload_needed) | ||
874 | loadsegment(fs, vmx->host_state.fs_sel); | ||
853 | reload_tss(); | 875 | reload_tss(); |
854 | #ifdef CONFIG_X86_64 | 876 | #ifdef CONFIG_X86_64 |
855 | if (is_long_mode(&vmx->vcpu)) { | 877 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
856 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | ||
857 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
858 | } | ||
859 | #endif | 878 | #endif |
860 | if (current_thread_info()->status & TS_USEDFPU) | 879 | if (current_thread_info()->status & TS_USEDFPU) |
861 | clts(); | 880 | clts(); |
@@ -1010,6 +1029,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
1010 | vmx_set_interrupt_shadow(vcpu, 0); | 1029 | vmx_set_interrupt_shadow(vcpu, 0); |
1011 | } | 1030 | } |
1012 | 1031 | ||
1032 | static void vmx_clear_hlt(struct kvm_vcpu *vcpu) | ||
1033 | { | ||
1034 | /* Ensure that we clear the HLT state in the VMCS. We don't need to | ||
1035 | * explicitly skip the instruction because if the HLT state is set, then | ||
1036 | * the instruction is already executing and RIP has already been | ||
1037 | * advanced. */ | ||
1038 | if (!yield_on_hlt && | ||
1039 | vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) | ||
1040 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); | ||
1041 | } | ||
1042 | |||
1013 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 1043 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
1014 | bool has_error_code, u32 error_code, | 1044 | bool has_error_code, u32 error_code, |
1015 | bool reinject) | 1045 | bool reinject) |
@@ -1036,6 +1066,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1036 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 1066 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
1037 | 1067 | ||
1038 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 1068 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
1069 | vmx_clear_hlt(vcpu); | ||
1039 | } | 1070 | } |
1040 | 1071 | ||
1041 | static bool vmx_rdtscp_supported(void) | 1072 | static bool vmx_rdtscp_supported(void) |
@@ -1306,8 +1337,11 @@ static __init int vmx_disabled_by_bios(void) | |||
1306 | && tboot_enabled()) | 1337 | && tboot_enabled()) |
1307 | return 1; | 1338 | return 1; |
1308 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | 1339 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) |
1309 | && !tboot_enabled()) | 1340 | && !tboot_enabled()) { |
1341 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " | ||
1342 | " activate TXT before enabling KVM\n"); | ||
1310 | return 1; | 1343 | return 1; |
1344 | } | ||
1311 | } | 1345 | } |
1312 | 1346 | ||
1313 | return 0; | 1347 | return 0; |
@@ -1401,6 +1435,14 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | |||
1401 | return 0; | 1435 | return 0; |
1402 | } | 1436 | } |
1403 | 1437 | ||
1438 | static __init bool allow_1_setting(u32 msr, u32 ctl) | ||
1439 | { | ||
1440 | u32 vmx_msr_low, vmx_msr_high; | ||
1441 | |||
1442 | rdmsr(msr, vmx_msr_low, vmx_msr_high); | ||
1443 | return vmx_msr_high & ctl; | ||
1444 | } | ||
1445 | |||
1404 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | 1446 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) |
1405 | { | 1447 | { |
1406 | u32 vmx_msr_low, vmx_msr_high; | 1448 | u32 vmx_msr_low, vmx_msr_high; |
@@ -1417,7 +1459,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1417 | &_pin_based_exec_control) < 0) | 1459 | &_pin_based_exec_control) < 0) |
1418 | return -EIO; | 1460 | return -EIO; |
1419 | 1461 | ||
1420 | min = CPU_BASED_HLT_EXITING | | 1462 | min = |
1421 | #ifdef CONFIG_X86_64 | 1463 | #ifdef CONFIG_X86_64 |
1422 | CPU_BASED_CR8_LOAD_EXITING | | 1464 | CPU_BASED_CR8_LOAD_EXITING | |
1423 | CPU_BASED_CR8_STORE_EXITING | | 1465 | CPU_BASED_CR8_STORE_EXITING | |
@@ -1430,6 +1472,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1430 | CPU_BASED_MWAIT_EXITING | | 1472 | CPU_BASED_MWAIT_EXITING | |
1431 | CPU_BASED_MONITOR_EXITING | | 1473 | CPU_BASED_MONITOR_EXITING | |
1432 | CPU_BASED_INVLPG_EXITING; | 1474 | CPU_BASED_INVLPG_EXITING; |
1475 | |||
1476 | if (yield_on_hlt) | ||
1477 | min |= CPU_BASED_HLT_EXITING; | ||
1478 | |||
1433 | opt = CPU_BASED_TPR_SHADOW | | 1479 | opt = CPU_BASED_TPR_SHADOW | |
1434 | CPU_BASED_USE_MSR_BITMAPS | | 1480 | CPU_BASED_USE_MSR_BITMAPS | |
1435 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1481 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -1511,6 +1557,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1511 | vmcs_conf->vmexit_ctrl = _vmexit_control; | 1557 | vmcs_conf->vmexit_ctrl = _vmexit_control; |
1512 | vmcs_conf->vmentry_ctrl = _vmentry_control; | 1558 | vmcs_conf->vmentry_ctrl = _vmentry_control; |
1513 | 1559 | ||
1560 | cpu_has_load_ia32_efer = | ||
1561 | allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, | ||
1562 | VM_ENTRY_LOAD_IA32_EFER) | ||
1563 | && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS, | ||
1564 | VM_EXIT_LOAD_IA32_EFER); | ||
1565 | |||
1514 | return 0; | 1566 | return 0; |
1515 | } | 1567 | } |
1516 | 1568 | ||
@@ -1684,9 +1736,13 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) | |||
1684 | save->limit = vmcs_read32(sf->limit); | 1736 | save->limit = vmcs_read32(sf->limit); |
1685 | save->ar = vmcs_read32(sf->ar_bytes); | 1737 | save->ar = vmcs_read32(sf->ar_bytes); |
1686 | vmcs_write16(sf->selector, save->base >> 4); | 1738 | vmcs_write16(sf->selector, save->base >> 4); |
1687 | vmcs_write32(sf->base, save->base & 0xfffff); | 1739 | vmcs_write32(sf->base, save->base & 0xffff0); |
1688 | vmcs_write32(sf->limit, 0xffff); | 1740 | vmcs_write32(sf->limit, 0xffff); |
1689 | vmcs_write32(sf->ar_bytes, 0xf3); | 1741 | vmcs_write32(sf->ar_bytes, 0xf3); |
1742 | if (save->base & 0xf) | ||
1743 | printk_once(KERN_WARNING "kvm: segment base is not paragraph" | ||
1744 | " aligned when entering protected mode (seg=%d)", | ||
1745 | seg); | ||
1690 | } | 1746 | } |
1691 | 1747 | ||
1692 | static void enter_rmode(struct kvm_vcpu *vcpu) | 1748 | static void enter_rmode(struct kvm_vcpu *vcpu) |
@@ -1815,6 +1871,13 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | |||
1815 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | 1871 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; |
1816 | } | 1872 | } |
1817 | 1873 | ||
1874 | static void vmx_decache_cr3(struct kvm_vcpu *vcpu) | ||
1875 | { | ||
1876 | if (enable_ept && is_paging(vcpu)) | ||
1877 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
1878 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
1879 | } | ||
1880 | |||
1818 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1881 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1819 | { | 1882 | { |
1820 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; | 1883 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; |
@@ -1858,6 +1921,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1858 | unsigned long cr0, | 1921 | unsigned long cr0, |
1859 | struct kvm_vcpu *vcpu) | 1922 | struct kvm_vcpu *vcpu) |
1860 | { | 1923 | { |
1924 | vmx_decache_cr3(vcpu); | ||
1861 | if (!(cr0 & X86_CR0_PG)) { | 1925 | if (!(cr0 & X86_CR0_PG)) { |
1862 | /* From paging/starting to nonpaging */ | 1926 | /* From paging/starting to nonpaging */ |
1863 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1927 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1938,7 +2002,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1938 | if (enable_ept) { | 2002 | if (enable_ept) { |
1939 | eptp = construct_eptp(cr3); | 2003 | eptp = construct_eptp(cr3); |
1940 | vmcs_write64(EPT_POINTER, eptp); | 2004 | vmcs_write64(EPT_POINTER, eptp); |
1941 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 2005 | guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : |
1942 | vcpu->kvm->arch.ept_identity_map_addr; | 2006 | vcpu->kvm->arch.ept_identity_map_addr; |
1943 | ept_load_pdptrs(vcpu); | 2007 | ept_load_pdptrs(vcpu); |
1944 | } | 2008 | } |
@@ -2726,7 +2790,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2726 | vmcs_writel(GUEST_IDTR_BASE, 0); | 2790 | vmcs_writel(GUEST_IDTR_BASE, 0); |
2727 | vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); | 2791 | vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); |
2728 | 2792 | ||
2729 | vmcs_write32(GUEST_ACTIVITY_STATE, 0); | 2793 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); |
2730 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); | 2794 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); |
2731 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); | 2795 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); |
2732 | 2796 | ||
@@ -2788,6 +2852,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2788 | return; | 2852 | return; |
2789 | } | 2853 | } |
2790 | 2854 | ||
2855 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | ||
2856 | enable_irq_window(vcpu); | ||
2857 | return; | ||
2858 | } | ||
2791 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 2859 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
2792 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 2860 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
2793 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2861 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
@@ -2815,6 +2883,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
2815 | } else | 2883 | } else |
2816 | intr |= INTR_TYPE_EXT_INTR; | 2884 | intr |= INTR_TYPE_EXT_INTR; |
2817 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | 2885 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); |
2886 | vmx_clear_hlt(vcpu); | ||
2818 | } | 2887 | } |
2819 | 2888 | ||
2820 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 2889 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -2842,6 +2911,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2842 | } | 2911 | } |
2843 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2912 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
2844 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2913 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
2914 | vmx_clear_hlt(vcpu); | ||
2845 | } | 2915 | } |
2846 | 2916 | ||
2847 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 2917 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -2850,7 +2920,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2850 | return 0; | 2920 | return 0; |
2851 | 2921 | ||
2852 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2922 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2853 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); | 2923 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI |
2924 | | GUEST_INTR_STATE_NMI)); | ||
2854 | } | 2925 | } |
2855 | 2926 | ||
2856 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 2927 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
@@ -2911,7 +2982,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2911 | * Cause the #SS fault with 0 error code in VM86 mode. | 2982 | * Cause the #SS fault with 0 error code in VM86 mode. |
2912 | */ | 2983 | */ |
2913 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2984 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
2914 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) | 2985 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) |
2915 | return 1; | 2986 | return 1; |
2916 | /* | 2987 | /* |
2917 | * Forward all other exceptions that are valid in real mode. | 2988 | * Forward all other exceptions that are valid in real mode. |
@@ -3008,7 +3079,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3008 | } | 3079 | } |
3009 | 3080 | ||
3010 | if (is_invalid_opcode(intr_info)) { | 3081 | if (is_invalid_opcode(intr_info)) { |
3011 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); | 3082 | er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); |
3012 | if (er != EMULATE_DONE) | 3083 | if (er != EMULATE_DONE) |
3013 | kvm_queue_exception(vcpu, UD_VECTOR); | 3084 | kvm_queue_exception(vcpu, UD_VECTOR); |
3014 | return 1; | 3085 | return 1; |
@@ -3027,7 +3098,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3027 | 3098 | ||
3028 | if (kvm_event_needs_reinjection(vcpu)) | 3099 | if (kvm_event_needs_reinjection(vcpu)) |
3029 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 3100 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
3030 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 3101 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); |
3031 | } | 3102 | } |
3032 | 3103 | ||
3033 | if (vmx->rmode.vm86_active && | 3104 | if (vmx->rmode.vm86_active && |
@@ -3099,7 +3170,7 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
3099 | ++vcpu->stat.io_exits; | 3170 | ++vcpu->stat.io_exits; |
3100 | 3171 | ||
3101 | if (string || in) | 3172 | if (string || in) |
3102 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 3173 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
3103 | 3174 | ||
3104 | port = exit_qualification >> 16; | 3175 | port = exit_qualification >> 16; |
3105 | size = (exit_qualification & 7) + 1; | 3176 | size = (exit_qualification & 7) + 1; |
@@ -3119,14 +3190,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
3119 | hypercall[2] = 0xc1; | 3190 | hypercall[2] = 0xc1; |
3120 | } | 3191 | } |
3121 | 3192 | ||
3122 | static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) | ||
3123 | { | ||
3124 | if (err) | ||
3125 | kvm_inject_gp(vcpu, 0); | ||
3126 | else | ||
3127 | skip_emulated_instruction(vcpu); | ||
3128 | } | ||
3129 | |||
3130 | static int handle_cr(struct kvm_vcpu *vcpu) | 3193 | static int handle_cr(struct kvm_vcpu *vcpu) |
3131 | { | 3194 | { |
3132 | unsigned long exit_qualification, val; | 3195 | unsigned long exit_qualification, val; |
@@ -3144,21 +3207,21 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3144 | switch (cr) { | 3207 | switch (cr) { |
3145 | case 0: | 3208 | case 0: |
3146 | err = kvm_set_cr0(vcpu, val); | 3209 | err = kvm_set_cr0(vcpu, val); |
3147 | complete_insn_gp(vcpu, err); | 3210 | kvm_complete_insn_gp(vcpu, err); |
3148 | return 1; | 3211 | return 1; |
3149 | case 3: | 3212 | case 3: |
3150 | err = kvm_set_cr3(vcpu, val); | 3213 | err = kvm_set_cr3(vcpu, val); |
3151 | complete_insn_gp(vcpu, err); | 3214 | kvm_complete_insn_gp(vcpu, err); |
3152 | return 1; | 3215 | return 1; |
3153 | case 4: | 3216 | case 4: |
3154 | err = kvm_set_cr4(vcpu, val); | 3217 | err = kvm_set_cr4(vcpu, val); |
3155 | complete_insn_gp(vcpu, err); | 3218 | kvm_complete_insn_gp(vcpu, err); |
3156 | return 1; | 3219 | return 1; |
3157 | case 8: { | 3220 | case 8: { |
3158 | u8 cr8_prev = kvm_get_cr8(vcpu); | 3221 | u8 cr8_prev = kvm_get_cr8(vcpu); |
3159 | u8 cr8 = kvm_register_read(vcpu, reg); | 3222 | u8 cr8 = kvm_register_read(vcpu, reg); |
3160 | kvm_set_cr8(vcpu, cr8); | 3223 | err = kvm_set_cr8(vcpu, cr8); |
3161 | skip_emulated_instruction(vcpu); | 3224 | kvm_complete_insn_gp(vcpu, err); |
3162 | if (irqchip_in_kernel(vcpu->kvm)) | 3225 | if (irqchip_in_kernel(vcpu->kvm)) |
3163 | return 1; | 3226 | return 1; |
3164 | if (cr8_prev <= cr8) | 3227 | if (cr8_prev <= cr8) |
@@ -3177,8 +3240,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3177 | case 1: /*mov from cr*/ | 3240 | case 1: /*mov from cr*/ |
3178 | switch (cr) { | 3241 | switch (cr) { |
3179 | case 3: | 3242 | case 3: |
3180 | kvm_register_write(vcpu, reg, vcpu->arch.cr3); | 3243 | val = kvm_read_cr3(vcpu); |
3181 | trace_kvm_cr_read(cr, vcpu->arch.cr3); | 3244 | kvm_register_write(vcpu, reg, val); |
3245 | trace_kvm_cr_read(cr, val); | ||
3182 | skip_emulated_instruction(vcpu); | 3246 | skip_emulated_instruction(vcpu); |
3183 | return 1; | 3247 | return 1; |
3184 | case 8: | 3248 | case 8: |
@@ -3350,6 +3414,11 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu) | |||
3350 | return 1; | 3414 | return 1; |
3351 | } | 3415 | } |
3352 | 3416 | ||
3417 | static int handle_invd(struct kvm_vcpu *vcpu) | ||
3418 | { | ||
3419 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | ||
3420 | } | ||
3421 | |||
3353 | static int handle_invlpg(struct kvm_vcpu *vcpu) | 3422 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
3354 | { | 3423 | { |
3355 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3424 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -3378,7 +3447,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) | |||
3378 | 3447 | ||
3379 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 3448 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
3380 | { | 3449 | { |
3381 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 3450 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
3382 | } | 3451 | } |
3383 | 3452 | ||
3384 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 3453 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
@@ -3477,7 +3546,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
3477 | 3546 | ||
3478 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 3547 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
3479 | trace_kvm_page_fault(gpa, exit_qualification); | 3548 | trace_kvm_page_fault(gpa, exit_qualification); |
3480 | return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); | 3549 | return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0); |
3481 | } | 3550 | } |
3482 | 3551 | ||
3483 | static u64 ept_rsvd_mask(u64 spte, int level) | 3552 | static u64 ept_rsvd_mask(u64 spte, int level) |
@@ -3593,7 +3662,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
3593 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) | 3662 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) |
3594 | return handle_interrupt_window(&vmx->vcpu); | 3663 | return handle_interrupt_window(&vmx->vcpu); |
3595 | 3664 | ||
3596 | err = emulate_instruction(vcpu, 0, 0, 0); | 3665 | err = emulate_instruction(vcpu, 0); |
3597 | 3666 | ||
3598 | if (err == EMULATE_DO_MMIO) { | 3667 | if (err == EMULATE_DO_MMIO) { |
3599 | ret = 0; | 3668 | ret = 0; |
@@ -3650,6 +3719,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3650 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, | 3719 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, |
3651 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, | 3720 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, |
3652 | [EXIT_REASON_HLT] = handle_halt, | 3721 | [EXIT_REASON_HLT] = handle_halt, |
3722 | [EXIT_REASON_INVD] = handle_invd, | ||
3653 | [EXIT_REASON_INVLPG] = handle_invlpg, | 3723 | [EXIT_REASON_INVLPG] = handle_invlpg, |
3654 | [EXIT_REASON_VMCALL] = handle_vmcall, | 3724 | [EXIT_REASON_VMCALL] = handle_vmcall, |
3655 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, | 3725 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, |
@@ -3677,6 +3747,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3677 | static const int kvm_vmx_max_exit_handlers = | 3747 | static const int kvm_vmx_max_exit_handlers = |
3678 | ARRAY_SIZE(kvm_vmx_exit_handlers); | 3748 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
3679 | 3749 | ||
3750 | static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | ||
3751 | { | ||
3752 | *info1 = vmcs_readl(EXIT_QUALIFICATION); | ||
3753 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | ||
3754 | } | ||
3755 | |||
3680 | /* | 3756 | /* |
3681 | * The guest has exited. See if we can fix it or if we need userspace | 3757 | * The guest has exited. See if we can fix it or if we need userspace |
3682 | * assistance. | 3758 | * assistance. |
@@ -3687,17 +3763,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3687 | u32 exit_reason = vmx->exit_reason; | 3763 | u32 exit_reason = vmx->exit_reason; |
3688 | u32 vectoring_info = vmx->idt_vectoring_info; | 3764 | u32 vectoring_info = vmx->idt_vectoring_info; |
3689 | 3765 | ||
3690 | trace_kvm_exit(exit_reason, vcpu); | 3766 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); |
3691 | 3767 | ||
3692 | /* If guest state is invalid, start emulating */ | 3768 | /* If guest state is invalid, start emulating */ |
3693 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3769 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3694 | return handle_invalid_guest_state(vcpu); | 3770 | return handle_invalid_guest_state(vcpu); |
3695 | 3771 | ||
3696 | /* Access CR3 don't cause VMExit in paging mode, so we need | ||
3697 | * to sync with guest real CR3. */ | ||
3698 | if (enable_ept && is_paging(vcpu)) | ||
3699 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
3700 | |||
3701 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | 3772 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { |
3702 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3773 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3703 | vcpu->run->fail_entry.hardware_entry_failure_reason | 3774 | vcpu->run->fail_entry.hardware_entry_failure_reason |
@@ -4014,7 +4085,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4014 | ); | 4085 | ); |
4015 | 4086 | ||
4016 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | 4087 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) |
4017 | | (1 << VCPU_EXREG_PDPTR)); | 4088 | | (1 << VCPU_EXREG_PDPTR) |
4089 | | (1 << VCPU_EXREG_CR3)); | ||
4018 | vcpu->arch.regs_dirty = 0; | 4090 | vcpu->arch.regs_dirty = 0; |
4019 | 4091 | ||
4020 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 4092 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
@@ -4228,11 +4300,6 @@ static int vmx_get_lpage_level(void) | |||
4228 | return PT_PDPE_LEVEL; | 4300 | return PT_PDPE_LEVEL; |
4229 | } | 4301 | } |
4230 | 4302 | ||
4231 | static inline u32 bit(int bitno) | ||
4232 | { | ||
4233 | return 1 << (bitno & 31); | ||
4234 | } | ||
4235 | |||
4236 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | 4303 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) |
4237 | { | 4304 | { |
4238 | struct kvm_cpuid_entry2 *best; | 4305 | struct kvm_cpuid_entry2 *best; |
@@ -4286,6 +4353,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4286 | .get_cpl = vmx_get_cpl, | 4353 | .get_cpl = vmx_get_cpl, |
4287 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 4354 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
4288 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | 4355 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, |
4356 | .decache_cr3 = vmx_decache_cr3, | ||
4289 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 4357 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
4290 | .set_cr0 = vmx_set_cr0, | 4358 | .set_cr0 = vmx_set_cr0, |
4291 | .set_cr3 = vmx_set_cr3, | 4359 | .set_cr3 = vmx_set_cr3, |
@@ -4326,7 +4394,9 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4326 | .get_tdp_level = get_ept_level, | 4394 | .get_tdp_level = get_ept_level, |
4327 | .get_mt_mask = vmx_get_mt_mask, | 4395 | .get_mt_mask = vmx_get_mt_mask, |
4328 | 4396 | ||
4397 | .get_exit_info = vmx_get_exit_info, | ||
4329 | .exit_reasons_str = vmx_exit_reasons_str, | 4398 | .exit_reasons_str = vmx_exit_reasons_str, |
4399 | |||
4330 | .get_lpage_level = vmx_get_lpage_level, | 4400 | .get_lpage_level = vmx_get_lpage_level, |
4331 | 4401 | ||
4332 | .cpuid_update = vmx_cpuid_update, | 4402 | .cpuid_update = vmx_cpuid_update, |
@@ -4402,8 +4472,6 @@ static int __init vmx_init(void) | |||
4402 | 4472 | ||
4403 | if (enable_ept) { | 4473 | if (enable_ept) { |
4404 | bypass_guest_pf = 0; | 4474 | bypass_guest_pf = 0; |
4405 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
4406 | VMX_EPT_WRITABLE_MASK); | ||
4407 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 4475 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
4408 | VMX_EPT_EXECUTABLE_MASK); | 4476 | VMX_EPT_EXECUTABLE_MASK); |
4409 | kvm_enable_tdp(); | 4477 | kvm_enable_tdp(); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cdac9e592aa5..bcc0efce85bf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/perf_event.h> | 44 | #include <linux/perf_event.h> |
45 | #include <linux/uaccess.h> | 45 | #include <linux/uaccess.h> |
46 | #include <linux/hash.h> | ||
46 | #include <trace/events/kvm.h> | 47 | #include <trace/events/kvm.h> |
47 | 48 | ||
48 | #define CREATE_TRACE_POINTS | 49 | #define CREATE_TRACE_POINTS |
@@ -155,9 +156,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
155 | 156 | ||
156 | u64 __read_mostly host_xcr0; | 157 | u64 __read_mostly host_xcr0; |
157 | 158 | ||
158 | static inline u32 bit(int bitno) | 159 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) |
159 | { | 160 | { |
160 | return 1 << (bitno & 31); | 161 | int i; |
162 | for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++) | ||
163 | vcpu->arch.apf.gfns[i] = ~0; | ||
161 | } | 164 | } |
162 | 165 | ||
163 | static void kvm_on_user_return(struct user_return_notifier *urn) | 166 | static void kvm_on_user_return(struct user_return_notifier *urn) |
@@ -331,23 +334,28 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) | |||
331 | } | 334 | } |
332 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); | 335 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); |
333 | 336 | ||
334 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu) | 337 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) |
335 | { | 338 | { |
336 | unsigned error_code = vcpu->arch.fault.error_code; | 339 | if (err) |
340 | kvm_inject_gp(vcpu, 0); | ||
341 | else | ||
342 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
343 | } | ||
344 | EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); | ||
337 | 345 | ||
346 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | ||
347 | { | ||
338 | ++vcpu->stat.pf_guest; | 348 | ++vcpu->stat.pf_guest; |
339 | vcpu->arch.cr2 = vcpu->arch.fault.address; | 349 | vcpu->arch.cr2 = fault->address; |
340 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); | 350 | kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); |
341 | } | 351 | } |
342 | 352 | ||
343 | void kvm_propagate_fault(struct kvm_vcpu *vcpu) | 353 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) |
344 | { | 354 | { |
345 | if (mmu_is_nested(vcpu) && !vcpu->arch.fault.nested) | 355 | if (mmu_is_nested(vcpu) && !fault->nested_page_fault) |
346 | vcpu->arch.nested_mmu.inject_page_fault(vcpu); | 356 | vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); |
347 | else | 357 | else |
348 | vcpu->arch.mmu.inject_page_fault(vcpu); | 358 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); |
349 | |||
350 | vcpu->arch.fault.nested = false; | ||
351 | } | 359 | } |
352 | 360 | ||
353 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 361 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -465,8 +473,8 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) | |||
465 | (unsigned long *)&vcpu->arch.regs_avail)) | 473 | (unsigned long *)&vcpu->arch.regs_avail)) |
466 | return true; | 474 | return true; |
467 | 475 | ||
468 | gfn = (vcpu->arch.cr3 & ~31u) >> PAGE_SHIFT; | 476 | gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT; |
469 | offset = (vcpu->arch.cr3 & ~31u) & (PAGE_SIZE - 1); | 477 | offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1); |
470 | r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), | 478 | r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), |
471 | PFERR_USER_MASK | PFERR_WRITE_MASK); | 479 | PFERR_USER_MASK | PFERR_WRITE_MASK); |
472 | if (r < 0) | 480 | if (r < 0) |
@@ -511,12 +519,15 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
511 | } else | 519 | } else |
512 | #endif | 520 | #endif |
513 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, | 521 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, |
514 | vcpu->arch.cr3)) | 522 | kvm_read_cr3(vcpu))) |
515 | return 1; | 523 | return 1; |
516 | } | 524 | } |
517 | 525 | ||
518 | kvm_x86_ops->set_cr0(vcpu, cr0); | 526 | kvm_x86_ops->set_cr0(vcpu, cr0); |
519 | 527 | ||
528 | if ((cr0 ^ old_cr0) & X86_CR0_PG) | ||
529 | kvm_clear_async_pf_completion_queue(vcpu); | ||
530 | |||
520 | if ((cr0 ^ old_cr0) & update_bits) | 531 | if ((cr0 ^ old_cr0) & update_bits) |
521 | kvm_mmu_reset_context(vcpu); | 532 | kvm_mmu_reset_context(vcpu); |
522 | return 0; | 533 | return 0; |
@@ -600,7 +611,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
600 | return 1; | 611 | return 1; |
601 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 612 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
602 | && ((cr4 ^ old_cr4) & pdptr_bits) | 613 | && ((cr4 ^ old_cr4) & pdptr_bits) |
603 | && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)) | 614 | && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, |
615 | kvm_read_cr3(vcpu))) | ||
604 | return 1; | 616 | return 1; |
605 | 617 | ||
606 | if (cr4 & X86_CR4_VMXE) | 618 | if (cr4 & X86_CR4_VMXE) |
@@ -620,7 +632,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); | |||
620 | 632 | ||
621 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 633 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
622 | { | 634 | { |
623 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 635 | if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { |
624 | kvm_mmu_sync_roots(vcpu); | 636 | kvm_mmu_sync_roots(vcpu); |
625 | kvm_mmu_flush_tlb(vcpu); | 637 | kvm_mmu_flush_tlb(vcpu); |
626 | return 0; | 638 | return 0; |
@@ -655,12 +667,13 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
655 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | 667 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) |
656 | return 1; | 668 | return 1; |
657 | vcpu->arch.cr3 = cr3; | 669 | vcpu->arch.cr3 = cr3; |
670 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
658 | vcpu->arch.mmu.new_cr3(vcpu); | 671 | vcpu->arch.mmu.new_cr3(vcpu); |
659 | return 0; | 672 | return 0; |
660 | } | 673 | } |
661 | EXPORT_SYMBOL_GPL(kvm_set_cr3); | 674 | EXPORT_SYMBOL_GPL(kvm_set_cr3); |
662 | 675 | ||
663 | int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 676 | int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
664 | { | 677 | { |
665 | if (cr8 & CR8_RESERVED_BITS) | 678 | if (cr8 & CR8_RESERVED_BITS) |
666 | return 1; | 679 | return 1; |
@@ -670,12 +683,6 @@ int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
670 | vcpu->arch.cr8 = cr8; | 683 | vcpu->arch.cr8 = cr8; |
671 | return 0; | 684 | return 0; |
672 | } | 685 | } |
673 | |||
674 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | ||
675 | { | ||
676 | if (__kvm_set_cr8(vcpu, cr8)) | ||
677 | kvm_inject_gp(vcpu, 0); | ||
678 | } | ||
679 | EXPORT_SYMBOL_GPL(kvm_set_cr8); | 686 | EXPORT_SYMBOL_GPL(kvm_set_cr8); |
680 | 687 | ||
681 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | 688 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) |
@@ -780,12 +787,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); | |||
780 | * kvm-specific. Those are put in the beginning of the list. | 787 | * kvm-specific. Those are put in the beginning of the list. |
781 | */ | 788 | */ |
782 | 789 | ||
783 | #define KVM_SAVE_MSRS_BEGIN 7 | 790 | #define KVM_SAVE_MSRS_BEGIN 8 |
784 | static u32 msrs_to_save[] = { | 791 | static u32 msrs_to_save[] = { |
785 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 792 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
786 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 793 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
787 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 794 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
788 | HV_X64_MSR_APIC_ASSIST_PAGE, | 795 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, |
789 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 796 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
790 | MSR_STAR, | 797 | MSR_STAR, |
791 | #ifdef CONFIG_X86_64 | 798 | #ifdef CONFIG_X86_64 |
@@ -835,7 +842,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
835 | kvm_x86_ops->set_efer(vcpu, efer); | 842 | kvm_x86_ops->set_efer(vcpu, efer); |
836 | 843 | ||
837 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 844 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
838 | kvm_mmu_reset_context(vcpu); | ||
839 | 845 | ||
840 | /* Update reserved bits */ | 846 | /* Update reserved bits */ |
841 | if ((efer ^ old_efer) & EFER_NX) | 847 | if ((efer ^ old_efer) & EFER_NX) |
@@ -981,7 +987,7 @@ static inline u64 nsec_to_cycles(u64 nsec) | |||
981 | if (kvm_tsc_changes_freq()) | 987 | if (kvm_tsc_changes_freq()) |
982 | printk_once(KERN_WARNING | 988 | printk_once(KERN_WARNING |
983 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); | 989 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); |
984 | ret = nsec * __get_cpu_var(cpu_tsc_khz); | 990 | ret = nsec * __this_cpu_read(cpu_tsc_khz); |
985 | do_div(ret, USEC_PER_SEC); | 991 | do_div(ret, USEC_PER_SEC); |
986 | return ret; | 992 | return ret; |
987 | } | 993 | } |
@@ -1066,7 +1072,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1066 | local_irq_save(flags); | 1072 | local_irq_save(flags); |
1067 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); | 1073 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); |
1068 | kernel_ns = get_kernel_ns(); | 1074 | kernel_ns = get_kernel_ns(); |
1069 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); | 1075 | this_tsc_khz = __this_cpu_read(cpu_tsc_khz); |
1070 | 1076 | ||
1071 | if (unlikely(this_tsc_khz == 0)) { | 1077 | if (unlikely(this_tsc_khz == 0)) { |
1072 | local_irq_restore(flags); | 1078 | local_irq_restore(flags); |
@@ -1423,6 +1429,30 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1423 | return 0; | 1429 | return 0; |
1424 | } | 1430 | } |
1425 | 1431 | ||
1432 | static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | ||
1433 | { | ||
1434 | gpa_t gpa = data & ~0x3f; | ||
1435 | |||
1436 | /* Bits 2:5 are resrved, Should be zero */ | ||
1437 | if (data & 0x3c) | ||
1438 | return 1; | ||
1439 | |||
1440 | vcpu->arch.apf.msr_val = data; | ||
1441 | |||
1442 | if (!(data & KVM_ASYNC_PF_ENABLED)) { | ||
1443 | kvm_clear_async_pf_completion_queue(vcpu); | ||
1444 | kvm_async_pf_hash_reset(vcpu); | ||
1445 | return 0; | ||
1446 | } | ||
1447 | |||
1448 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) | ||
1449 | return 1; | ||
1450 | |||
1451 | vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); | ||
1452 | kvm_async_pf_wakeup_all(vcpu); | ||
1453 | return 0; | ||
1454 | } | ||
1455 | |||
1426 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1456 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1427 | { | 1457 | { |
1428 | switch (msr) { | 1458 | switch (msr) { |
@@ -1504,6 +1534,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1504 | } | 1534 | } |
1505 | break; | 1535 | break; |
1506 | } | 1536 | } |
1537 | case MSR_KVM_ASYNC_PF_EN: | ||
1538 | if (kvm_pv_enable_async_pf(vcpu, data)) | ||
1539 | return 1; | ||
1540 | break; | ||
1507 | case MSR_IA32_MCG_CTL: | 1541 | case MSR_IA32_MCG_CTL: |
1508 | case MSR_IA32_MCG_STATUS: | 1542 | case MSR_IA32_MCG_STATUS: |
1509 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1543 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
@@ -1780,6 +1814,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1780 | case MSR_KVM_SYSTEM_TIME_NEW: | 1814 | case MSR_KVM_SYSTEM_TIME_NEW: |
1781 | data = vcpu->arch.time; | 1815 | data = vcpu->arch.time; |
1782 | break; | 1816 | break; |
1817 | case MSR_KVM_ASYNC_PF_EN: | ||
1818 | data = vcpu->arch.apf.msr_val; | ||
1819 | break; | ||
1783 | case MSR_IA32_P5_MC_ADDR: | 1820 | case MSR_IA32_P5_MC_ADDR: |
1784 | case MSR_IA32_P5_MC_TYPE: | 1821 | case MSR_IA32_P5_MC_TYPE: |
1785 | case MSR_IA32_MCG_CAP: | 1822 | case MSR_IA32_MCG_CAP: |
@@ -1909,6 +1946,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1909 | case KVM_CAP_NOP_IO_DELAY: | 1946 | case KVM_CAP_NOP_IO_DELAY: |
1910 | case KVM_CAP_MP_STATE: | 1947 | case KVM_CAP_MP_STATE: |
1911 | case KVM_CAP_SYNC_MMU: | 1948 | case KVM_CAP_SYNC_MMU: |
1949 | case KVM_CAP_USER_NMI: | ||
1912 | case KVM_CAP_REINJECT_CONTROL: | 1950 | case KVM_CAP_REINJECT_CONTROL: |
1913 | case KVM_CAP_IRQ_INJECT_STATUS: | 1951 | case KVM_CAP_IRQ_INJECT_STATUS: |
1914 | case KVM_CAP_ASSIGN_DEV_IRQ: | 1952 | case KVM_CAP_ASSIGN_DEV_IRQ: |
@@ -1927,6 +1965,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1927 | case KVM_CAP_DEBUGREGS: | 1965 | case KVM_CAP_DEBUGREGS: |
1928 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1966 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1929 | case KVM_CAP_XSAVE: | 1967 | case KVM_CAP_XSAVE: |
1968 | case KVM_CAP_ASYNC_PF: | ||
1930 | r = 1; | 1969 | r = 1; |
1931 | break; | 1970 | break; |
1932 | case KVM_CAP_COALESCED_MMIO: | 1971 | case KVM_CAP_COALESCED_MMIO: |
@@ -2190,6 +2229,11 @@ out: | |||
2190 | return r; | 2229 | return r; |
2191 | } | 2230 | } |
2192 | 2231 | ||
2232 | static void cpuid_mask(u32 *word, int wordnum) | ||
2233 | { | ||
2234 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
2235 | } | ||
2236 | |||
2193 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 2237 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
2194 | u32 index) | 2238 | u32 index) |
2195 | { | 2239 | { |
@@ -2264,7 +2308,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2264 | break; | 2308 | break; |
2265 | case 1: | 2309 | case 1: |
2266 | entry->edx &= kvm_supported_word0_x86_features; | 2310 | entry->edx &= kvm_supported_word0_x86_features; |
2311 | cpuid_mask(&entry->edx, 0); | ||
2267 | entry->ecx &= kvm_supported_word4_x86_features; | 2312 | entry->ecx &= kvm_supported_word4_x86_features; |
2313 | cpuid_mask(&entry->ecx, 4); | ||
2268 | /* we support x2apic emulation even if host does not support | 2314 | /* we support x2apic emulation even if host does not support |
2269 | * it since we emulate x2apic in software */ | 2315 | * it since we emulate x2apic in software */ |
2270 | entry->ecx |= F(X2APIC); | 2316 | entry->ecx |= F(X2APIC); |
@@ -2355,7 +2401,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2355 | break; | 2401 | break; |
2356 | case 0x80000001: | 2402 | case 0x80000001: |
2357 | entry->edx &= kvm_supported_word1_x86_features; | 2403 | entry->edx &= kvm_supported_word1_x86_features; |
2404 | cpuid_mask(&entry->edx, 1); | ||
2358 | entry->ecx &= kvm_supported_word6_x86_features; | 2405 | entry->ecx &= kvm_supported_word6_x86_features; |
2406 | cpuid_mask(&entry->ecx, 6); | ||
2359 | break; | 2407 | break; |
2360 | } | 2408 | } |
2361 | 2409 | ||
@@ -3174,20 +3222,18 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3174 | struct kvm_memslots *slots, *old_slots; | 3222 | struct kvm_memslots *slots, *old_slots; |
3175 | unsigned long *dirty_bitmap; | 3223 | unsigned long *dirty_bitmap; |
3176 | 3224 | ||
3177 | r = -ENOMEM; | 3225 | dirty_bitmap = memslot->dirty_bitmap_head; |
3178 | dirty_bitmap = vmalloc(n); | 3226 | if (memslot->dirty_bitmap == dirty_bitmap) |
3179 | if (!dirty_bitmap) | 3227 | dirty_bitmap += n / sizeof(long); |
3180 | goto out; | ||
3181 | memset(dirty_bitmap, 0, n); | 3228 | memset(dirty_bitmap, 0, n); |
3182 | 3229 | ||
3183 | r = -ENOMEM; | 3230 | r = -ENOMEM; |
3184 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 3231 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
3185 | if (!slots) { | 3232 | if (!slots) |
3186 | vfree(dirty_bitmap); | ||
3187 | goto out; | 3233 | goto out; |
3188 | } | ||
3189 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 3234 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
3190 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 3235 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; |
3236 | slots->generation++; | ||
3191 | 3237 | ||
3192 | old_slots = kvm->memslots; | 3238 | old_slots = kvm->memslots; |
3193 | rcu_assign_pointer(kvm->memslots, slots); | 3239 | rcu_assign_pointer(kvm->memslots, slots); |
@@ -3200,11 +3246,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3200 | spin_unlock(&kvm->mmu_lock); | 3246 | spin_unlock(&kvm->mmu_lock); |
3201 | 3247 | ||
3202 | r = -EFAULT; | 3248 | r = -EFAULT; |
3203 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { | 3249 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) |
3204 | vfree(dirty_bitmap); | ||
3205 | goto out; | 3250 | goto out; |
3206 | } | ||
3207 | vfree(dirty_bitmap); | ||
3208 | } else { | 3251 | } else { |
3209 | r = -EFAULT; | 3252 | r = -EFAULT; |
3210 | if (clear_user(log->dirty_bitmap, n)) | 3253 | if (clear_user(log->dirty_bitmap, n)) |
@@ -3271,8 +3314,10 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3271 | if (vpic) { | 3314 | if (vpic) { |
3272 | r = kvm_ioapic_init(kvm); | 3315 | r = kvm_ioapic_init(kvm); |
3273 | if (r) { | 3316 | if (r) { |
3317 | mutex_lock(&kvm->slots_lock); | ||
3274 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | 3318 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, |
3275 | &vpic->dev); | 3319 | &vpic->dev); |
3320 | mutex_unlock(&kvm->slots_lock); | ||
3276 | kfree(vpic); | 3321 | kfree(vpic); |
3277 | goto create_irqchip_unlock; | 3322 | goto create_irqchip_unlock; |
3278 | } | 3323 | } |
@@ -3283,10 +3328,12 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3283 | smp_wmb(); | 3328 | smp_wmb(); |
3284 | r = kvm_setup_default_irq_routing(kvm); | 3329 | r = kvm_setup_default_irq_routing(kvm); |
3285 | if (r) { | 3330 | if (r) { |
3331 | mutex_lock(&kvm->slots_lock); | ||
3286 | mutex_lock(&kvm->irq_lock); | 3332 | mutex_lock(&kvm->irq_lock); |
3287 | kvm_ioapic_destroy(kvm); | 3333 | kvm_ioapic_destroy(kvm); |
3288 | kvm_destroy_pic(kvm); | 3334 | kvm_destroy_pic(kvm); |
3289 | mutex_unlock(&kvm->irq_lock); | 3335 | mutex_unlock(&kvm->irq_lock); |
3336 | mutex_unlock(&kvm->slots_lock); | ||
3290 | } | 3337 | } |
3291 | create_irqchip_unlock: | 3338 | create_irqchip_unlock: |
3292 | mutex_unlock(&kvm->lock); | 3339 | mutex_unlock(&kvm->lock); |
@@ -3562,63 +3609,63 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | |||
3562 | static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | 3609 | static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) |
3563 | { | 3610 | { |
3564 | gpa_t t_gpa; | 3611 | gpa_t t_gpa; |
3565 | u32 error; | 3612 | struct x86_exception exception; |
3566 | 3613 | ||
3567 | BUG_ON(!mmu_is_nested(vcpu)); | 3614 | BUG_ON(!mmu_is_nested(vcpu)); |
3568 | 3615 | ||
3569 | /* NPT walks are always user-walks */ | 3616 | /* NPT walks are always user-walks */ |
3570 | access |= PFERR_USER_MASK; | 3617 | access |= PFERR_USER_MASK; |
3571 | t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &error); | 3618 | t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception); |
3572 | if (t_gpa == UNMAPPED_GVA) | ||
3573 | vcpu->arch.fault.nested = true; | ||
3574 | 3619 | ||
3575 | return t_gpa; | 3620 | return t_gpa; |
3576 | } | 3621 | } |
3577 | 3622 | ||
3578 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3623 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
3624 | struct x86_exception *exception) | ||
3579 | { | 3625 | { |
3580 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3626 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3581 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); | 3627 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3582 | } | 3628 | } |
3583 | 3629 | ||
3584 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3630 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, |
3631 | struct x86_exception *exception) | ||
3585 | { | 3632 | { |
3586 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3633 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3587 | access |= PFERR_FETCH_MASK; | 3634 | access |= PFERR_FETCH_MASK; |
3588 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); | 3635 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3589 | } | 3636 | } |
3590 | 3637 | ||
3591 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3638 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, |
3639 | struct x86_exception *exception) | ||
3592 | { | 3640 | { |
3593 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3641 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3594 | access |= PFERR_WRITE_MASK; | 3642 | access |= PFERR_WRITE_MASK; |
3595 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); | 3643 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3596 | } | 3644 | } |
3597 | 3645 | ||
3598 | /* uses this to access any guest's mapped memory without checking CPL */ | 3646 | /* uses this to access any guest's mapped memory without checking CPL */ |
3599 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3647 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, |
3648 | struct x86_exception *exception) | ||
3600 | { | 3649 | { |
3601 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, error); | 3650 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception); |
3602 | } | 3651 | } |
3603 | 3652 | ||
3604 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | 3653 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, |
3605 | struct kvm_vcpu *vcpu, u32 access, | 3654 | struct kvm_vcpu *vcpu, u32 access, |
3606 | u32 *error) | 3655 | struct x86_exception *exception) |
3607 | { | 3656 | { |
3608 | void *data = val; | 3657 | void *data = val; |
3609 | int r = X86EMUL_CONTINUE; | 3658 | int r = X86EMUL_CONTINUE; |
3610 | 3659 | ||
3611 | while (bytes) { | 3660 | while (bytes) { |
3612 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access, | 3661 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access, |
3613 | error); | 3662 | exception); |
3614 | unsigned offset = addr & (PAGE_SIZE-1); | 3663 | unsigned offset = addr & (PAGE_SIZE-1); |
3615 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); | 3664 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); |
3616 | int ret; | 3665 | int ret; |
3617 | 3666 | ||
3618 | if (gpa == UNMAPPED_GVA) { | 3667 | if (gpa == UNMAPPED_GVA) |
3619 | r = X86EMUL_PROPAGATE_FAULT; | 3668 | return X86EMUL_PROPAGATE_FAULT; |
3620 | goto out; | ||
3621 | } | ||
3622 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); | 3669 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); |
3623 | if (ret < 0) { | 3670 | if (ret < 0) { |
3624 | r = X86EMUL_IO_NEEDED; | 3671 | r = X86EMUL_IO_NEEDED; |
@@ -3635,31 +3682,35 @@ out: | |||
3635 | 3682 | ||
3636 | /* used for instruction fetching */ | 3683 | /* used for instruction fetching */ |
3637 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3684 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, |
3638 | struct kvm_vcpu *vcpu, u32 *error) | 3685 | struct kvm_vcpu *vcpu, |
3686 | struct x86_exception *exception) | ||
3639 | { | 3687 | { |
3640 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3688 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3641 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | 3689 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, |
3642 | access | PFERR_FETCH_MASK, error); | 3690 | access | PFERR_FETCH_MASK, |
3691 | exception); | ||
3643 | } | 3692 | } |
3644 | 3693 | ||
3645 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3694 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, |
3646 | struct kvm_vcpu *vcpu, u32 *error) | 3695 | struct kvm_vcpu *vcpu, |
3696 | struct x86_exception *exception) | ||
3647 | { | 3697 | { |
3648 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3698 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3649 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | 3699 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, |
3650 | error); | 3700 | exception); |
3651 | } | 3701 | } |
3652 | 3702 | ||
3653 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | 3703 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, |
3654 | struct kvm_vcpu *vcpu, u32 *error) | 3704 | struct kvm_vcpu *vcpu, |
3705 | struct x86_exception *exception) | ||
3655 | { | 3706 | { |
3656 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3707 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); |
3657 | } | 3708 | } |
3658 | 3709 | ||
3659 | static int kvm_write_guest_virt_system(gva_t addr, void *val, | 3710 | static int kvm_write_guest_virt_system(gva_t addr, void *val, |
3660 | unsigned int bytes, | 3711 | unsigned int bytes, |
3661 | struct kvm_vcpu *vcpu, | 3712 | struct kvm_vcpu *vcpu, |
3662 | u32 *error) | 3713 | struct x86_exception *exception) |
3663 | { | 3714 | { |
3664 | void *data = val; | 3715 | void *data = val; |
3665 | int r = X86EMUL_CONTINUE; | 3716 | int r = X86EMUL_CONTINUE; |
@@ -3667,15 +3718,13 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val, | |||
3667 | while (bytes) { | 3718 | while (bytes) { |
3668 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, | 3719 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, |
3669 | PFERR_WRITE_MASK, | 3720 | PFERR_WRITE_MASK, |
3670 | error); | 3721 | exception); |
3671 | unsigned offset = addr & (PAGE_SIZE-1); | 3722 | unsigned offset = addr & (PAGE_SIZE-1); |
3672 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3723 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
3673 | int ret; | 3724 | int ret; |
3674 | 3725 | ||
3675 | if (gpa == UNMAPPED_GVA) { | 3726 | if (gpa == UNMAPPED_GVA) |
3676 | r = X86EMUL_PROPAGATE_FAULT; | 3727 | return X86EMUL_PROPAGATE_FAULT; |
3677 | goto out; | ||
3678 | } | ||
3679 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); | 3728 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); |
3680 | if (ret < 0) { | 3729 | if (ret < 0) { |
3681 | r = X86EMUL_IO_NEEDED; | 3730 | r = X86EMUL_IO_NEEDED; |
@@ -3693,7 +3742,7 @@ out: | |||
3693 | static int emulator_read_emulated(unsigned long addr, | 3742 | static int emulator_read_emulated(unsigned long addr, |
3694 | void *val, | 3743 | void *val, |
3695 | unsigned int bytes, | 3744 | unsigned int bytes, |
3696 | unsigned int *error_code, | 3745 | struct x86_exception *exception, |
3697 | struct kvm_vcpu *vcpu) | 3746 | struct kvm_vcpu *vcpu) |
3698 | { | 3747 | { |
3699 | gpa_t gpa; | 3748 | gpa_t gpa; |
@@ -3706,7 +3755,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
3706 | return X86EMUL_CONTINUE; | 3755 | return X86EMUL_CONTINUE; |
3707 | } | 3756 | } |
3708 | 3757 | ||
3709 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); | 3758 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception); |
3710 | 3759 | ||
3711 | if (gpa == UNMAPPED_GVA) | 3760 | if (gpa == UNMAPPED_GVA) |
3712 | return X86EMUL_PROPAGATE_FAULT; | 3761 | return X86EMUL_PROPAGATE_FAULT; |
@@ -3715,8 +3764,8 @@ static int emulator_read_emulated(unsigned long addr, | |||
3715 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3764 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3716 | goto mmio; | 3765 | goto mmio; |
3717 | 3766 | ||
3718 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) | 3767 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) |
3719 | == X86EMUL_CONTINUE) | 3768 | == X86EMUL_CONTINUE) |
3720 | return X86EMUL_CONTINUE; | 3769 | return X86EMUL_CONTINUE; |
3721 | 3770 | ||
3722 | mmio: | 3771 | mmio: |
@@ -3740,7 +3789,7 @@ mmio: | |||
3740 | } | 3789 | } |
3741 | 3790 | ||
3742 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 3791 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
3743 | const void *val, int bytes) | 3792 | const void *val, int bytes) |
3744 | { | 3793 | { |
3745 | int ret; | 3794 | int ret; |
3746 | 3795 | ||
@@ -3754,12 +3803,12 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3754 | static int emulator_write_emulated_onepage(unsigned long addr, | 3803 | static int emulator_write_emulated_onepage(unsigned long addr, |
3755 | const void *val, | 3804 | const void *val, |
3756 | unsigned int bytes, | 3805 | unsigned int bytes, |
3757 | unsigned int *error_code, | 3806 | struct x86_exception *exception, |
3758 | struct kvm_vcpu *vcpu) | 3807 | struct kvm_vcpu *vcpu) |
3759 | { | 3808 | { |
3760 | gpa_t gpa; | 3809 | gpa_t gpa; |
3761 | 3810 | ||
3762 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); | 3811 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); |
3763 | 3812 | ||
3764 | if (gpa == UNMAPPED_GVA) | 3813 | if (gpa == UNMAPPED_GVA) |
3765 | return X86EMUL_PROPAGATE_FAULT; | 3814 | return X86EMUL_PROPAGATE_FAULT; |
@@ -3792,7 +3841,7 @@ mmio: | |||
3792 | int emulator_write_emulated(unsigned long addr, | 3841 | int emulator_write_emulated(unsigned long addr, |
3793 | const void *val, | 3842 | const void *val, |
3794 | unsigned int bytes, | 3843 | unsigned int bytes, |
3795 | unsigned int *error_code, | 3844 | struct x86_exception *exception, |
3796 | struct kvm_vcpu *vcpu) | 3845 | struct kvm_vcpu *vcpu) |
3797 | { | 3846 | { |
3798 | /* Crossing a page boundary? */ | 3847 | /* Crossing a page boundary? */ |
@@ -3800,7 +3849,7 @@ int emulator_write_emulated(unsigned long addr, | |||
3800 | int rc, now; | 3849 | int rc, now; |
3801 | 3850 | ||
3802 | now = -addr & ~PAGE_MASK; | 3851 | now = -addr & ~PAGE_MASK; |
3803 | rc = emulator_write_emulated_onepage(addr, val, now, error_code, | 3852 | rc = emulator_write_emulated_onepage(addr, val, now, exception, |
3804 | vcpu); | 3853 | vcpu); |
3805 | if (rc != X86EMUL_CONTINUE) | 3854 | if (rc != X86EMUL_CONTINUE) |
3806 | return rc; | 3855 | return rc; |
@@ -3808,7 +3857,7 @@ int emulator_write_emulated(unsigned long addr, | |||
3808 | val += now; | 3857 | val += now; |
3809 | bytes -= now; | 3858 | bytes -= now; |
3810 | } | 3859 | } |
3811 | return emulator_write_emulated_onepage(addr, val, bytes, error_code, | 3860 | return emulator_write_emulated_onepage(addr, val, bytes, exception, |
3812 | vcpu); | 3861 | vcpu); |
3813 | } | 3862 | } |
3814 | 3863 | ||
@@ -3826,7 +3875,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3826 | const void *old, | 3875 | const void *old, |
3827 | const void *new, | 3876 | const void *new, |
3828 | unsigned int bytes, | 3877 | unsigned int bytes, |
3829 | unsigned int *error_code, | 3878 | struct x86_exception *exception, |
3830 | struct kvm_vcpu *vcpu) | 3879 | struct kvm_vcpu *vcpu) |
3831 | { | 3880 | { |
3832 | gpa_t gpa; | 3881 | gpa_t gpa; |
@@ -3884,7 +3933,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3884 | emul_write: | 3933 | emul_write: |
3885 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3934 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3886 | 3935 | ||
3887 | return emulator_write_emulated(addr, new, bytes, error_code, vcpu); | 3936 | return emulator_write_emulated(addr, new, bytes, exception, vcpu); |
3888 | } | 3937 | } |
3889 | 3938 | ||
3890 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 3939 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) |
@@ -3909,7 +3958,7 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | |||
3909 | if (vcpu->arch.pio.count) | 3958 | if (vcpu->arch.pio.count) |
3910 | goto data_avail; | 3959 | goto data_avail; |
3911 | 3960 | ||
3912 | trace_kvm_pio(0, port, size, 1); | 3961 | trace_kvm_pio(0, port, size, count); |
3913 | 3962 | ||
3914 | vcpu->arch.pio.port = port; | 3963 | vcpu->arch.pio.port = port; |
3915 | vcpu->arch.pio.in = 1; | 3964 | vcpu->arch.pio.in = 1; |
@@ -3937,7 +3986,7 @@ static int emulator_pio_out_emulated(int size, unsigned short port, | |||
3937 | const void *val, unsigned int count, | 3986 | const void *val, unsigned int count, |
3938 | struct kvm_vcpu *vcpu) | 3987 | struct kvm_vcpu *vcpu) |
3939 | { | 3988 | { |
3940 | trace_kvm_pio(1, port, size, 1); | 3989 | trace_kvm_pio(1, port, size, count); |
3941 | 3990 | ||
3942 | vcpu->arch.pio.port = port; | 3991 | vcpu->arch.pio.port = port; |
3943 | vcpu->arch.pio.in = 0; | 3992 | vcpu->arch.pio.in = 0; |
@@ -3978,13 +4027,15 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | |||
3978 | return X86EMUL_CONTINUE; | 4027 | return X86EMUL_CONTINUE; |
3979 | 4028 | ||
3980 | if (kvm_x86_ops->has_wbinvd_exit()) { | 4029 | if (kvm_x86_ops->has_wbinvd_exit()) { |
3981 | preempt_disable(); | 4030 | int cpu = get_cpu(); |
4031 | |||
4032 | cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); | ||
3982 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, | 4033 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, |
3983 | wbinvd_ipi, NULL, 1); | 4034 | wbinvd_ipi, NULL, 1); |
3984 | preempt_enable(); | 4035 | put_cpu(); |
3985 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); | 4036 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); |
3986 | } | 4037 | } else |
3987 | wbinvd(); | 4038 | wbinvd(); |
3988 | return X86EMUL_CONTINUE; | 4039 | return X86EMUL_CONTINUE; |
3989 | } | 4040 | } |
3990 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | 4041 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); |
@@ -4024,7 +4075,7 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
4024 | value = vcpu->arch.cr2; | 4075 | value = vcpu->arch.cr2; |
4025 | break; | 4076 | break; |
4026 | case 3: | 4077 | case 3: |
4027 | value = vcpu->arch.cr3; | 4078 | value = kvm_read_cr3(vcpu); |
4028 | break; | 4079 | break; |
4029 | case 4: | 4080 | case 4: |
4030 | value = kvm_read_cr4(vcpu); | 4081 | value = kvm_read_cr4(vcpu); |
@@ -4058,7 +4109,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | |||
4058 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | 4109 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
4059 | break; | 4110 | break; |
4060 | case 8: | 4111 | case 8: |
4061 | res = __kvm_set_cr8(vcpu, val & 0xfUL); | 4112 | res = kvm_set_cr8(vcpu, val); |
4062 | break; | 4113 | break; |
4063 | default: | 4114 | default: |
4064 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 4115 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
@@ -4211,12 +4262,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | |||
4211 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) | 4262 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) |
4212 | { | 4263 | { |
4213 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4264 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4214 | if (ctxt->exception == PF_VECTOR) | 4265 | if (ctxt->exception.vector == PF_VECTOR) |
4215 | kvm_propagate_fault(vcpu); | 4266 | kvm_propagate_fault(vcpu, &ctxt->exception); |
4216 | else if (ctxt->error_code_valid) | 4267 | else if (ctxt->exception.error_code_valid) |
4217 | kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); | 4268 | kvm_queue_exception_e(vcpu, ctxt->exception.vector, |
4269 | ctxt->exception.error_code); | ||
4218 | else | 4270 | else |
4219 | kvm_queue_exception(vcpu, ctxt->exception); | 4271 | kvm_queue_exception(vcpu, ctxt->exception.vector); |
4220 | } | 4272 | } |
4221 | 4273 | ||
4222 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | 4274 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) |
@@ -4272,13 +4324,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); | |||
4272 | 4324 | ||
4273 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) | 4325 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) |
4274 | { | 4326 | { |
4327 | int r = EMULATE_DONE; | ||
4328 | |||
4275 | ++vcpu->stat.insn_emulation_fail; | 4329 | ++vcpu->stat.insn_emulation_fail; |
4276 | trace_kvm_emulate_insn_failed(vcpu); | 4330 | trace_kvm_emulate_insn_failed(vcpu); |
4277 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 4331 | if (!is_guest_mode(vcpu)) { |
4278 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 4332 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
4279 | vcpu->run->internal.ndata = 0; | 4333 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
4334 | vcpu->run->internal.ndata = 0; | ||
4335 | r = EMULATE_FAIL; | ||
4336 | } | ||
4280 | kvm_queue_exception(vcpu, UD_VECTOR); | 4337 | kvm_queue_exception(vcpu, UD_VECTOR); |
4281 | return EMULATE_FAIL; | 4338 | |
4339 | return r; | ||
4282 | } | 4340 | } |
4283 | 4341 | ||
4284 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | 4342 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -4307,10 +4365,11 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4307 | return false; | 4365 | return false; |
4308 | } | 4366 | } |
4309 | 4367 | ||
4310 | int emulate_instruction(struct kvm_vcpu *vcpu, | 4368 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4311 | unsigned long cr2, | 4369 | unsigned long cr2, |
4312 | u16 error_code, | 4370 | int emulation_type, |
4313 | int emulation_type) | 4371 | void *insn, |
4372 | int insn_len) | ||
4314 | { | 4373 | { |
4315 | int r; | 4374 | int r; |
4316 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4375 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
@@ -4328,10 +4387,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
4328 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4387 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
4329 | init_emulate_ctxt(vcpu); | 4388 | init_emulate_ctxt(vcpu); |
4330 | vcpu->arch.emulate_ctxt.interruptibility = 0; | 4389 | vcpu->arch.emulate_ctxt.interruptibility = 0; |
4331 | vcpu->arch.emulate_ctxt.exception = -1; | 4390 | vcpu->arch.emulate_ctxt.have_exception = false; |
4332 | vcpu->arch.emulate_ctxt.perm_ok = false; | 4391 | vcpu->arch.emulate_ctxt.perm_ok = false; |
4333 | 4392 | ||
4334 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt); | 4393 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); |
4335 | if (r == X86EMUL_PROPAGATE_FAULT) | 4394 | if (r == X86EMUL_PROPAGATE_FAULT) |
4336 | goto done; | 4395 | goto done; |
4337 | 4396 | ||
@@ -4394,7 +4453,7 @@ restart: | |||
4394 | } | 4453 | } |
4395 | 4454 | ||
4396 | done: | 4455 | done: |
4397 | if (vcpu->arch.emulate_ctxt.exception >= 0) { | 4456 | if (vcpu->arch.emulate_ctxt.have_exception) { |
4398 | inject_emulated_exception(vcpu); | 4457 | inject_emulated_exception(vcpu); |
4399 | r = EMULATE_DONE; | 4458 | r = EMULATE_DONE; |
4400 | } else if (vcpu->arch.pio.count) { | 4459 | } else if (vcpu->arch.pio.count) { |
@@ -4418,7 +4477,7 @@ done: | |||
4418 | 4477 | ||
4419 | return r; | 4478 | return r; |
4420 | } | 4479 | } |
4421 | EXPORT_SYMBOL_GPL(emulate_instruction); | 4480 | EXPORT_SYMBOL_GPL(x86_emulate_instruction); |
4422 | 4481 | ||
4423 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) | 4482 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
4424 | { | 4483 | { |
@@ -4432,7 +4491,7 @@ EXPORT_SYMBOL_GPL(kvm_fast_pio_out); | |||
4432 | 4491 | ||
4433 | static void tsc_bad(void *info) | 4492 | static void tsc_bad(void *info) |
4434 | { | 4493 | { |
4435 | __get_cpu_var(cpu_tsc_khz) = 0; | 4494 | __this_cpu_write(cpu_tsc_khz, 0); |
4436 | } | 4495 | } |
4437 | 4496 | ||
4438 | static void tsc_khz_changed(void *data) | 4497 | static void tsc_khz_changed(void *data) |
@@ -4446,7 +4505,7 @@ static void tsc_khz_changed(void *data) | |||
4446 | khz = cpufreq_quick_get(raw_smp_processor_id()); | 4505 | khz = cpufreq_quick_get(raw_smp_processor_id()); |
4447 | if (!khz) | 4506 | if (!khz) |
4448 | khz = tsc_khz; | 4507 | khz = tsc_khz; |
4449 | __get_cpu_var(cpu_tsc_khz) = khz; | 4508 | __this_cpu_write(cpu_tsc_khz, khz); |
4450 | } | 4509 | } |
4451 | 4510 | ||
4452 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 4511 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
@@ -4569,9 +4628,11 @@ static void kvm_timer_init(void) | |||
4569 | #ifdef CONFIG_CPU_FREQ | 4628 | #ifdef CONFIG_CPU_FREQ |
4570 | struct cpufreq_policy policy; | 4629 | struct cpufreq_policy policy; |
4571 | memset(&policy, 0, sizeof(policy)); | 4630 | memset(&policy, 0, sizeof(policy)); |
4572 | cpufreq_get_policy(&policy, get_cpu()); | 4631 | cpu = get_cpu(); |
4632 | cpufreq_get_policy(&policy, cpu); | ||
4573 | if (policy.cpuinfo.max_freq) | 4633 | if (policy.cpuinfo.max_freq) |
4574 | max_tsc_khz = policy.cpuinfo.max_freq; | 4634 | max_tsc_khz = policy.cpuinfo.max_freq; |
4635 | put_cpu(); | ||
4575 | #endif | 4636 | #endif |
4576 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | 4637 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, |
4577 | CPUFREQ_TRANSITION_NOTIFIER); | 4638 | CPUFREQ_TRANSITION_NOTIFIER); |
@@ -4656,7 +4717,6 @@ int kvm_arch_init(void *opaque) | |||
4656 | 4717 | ||
4657 | kvm_x86_ops = ops; | 4718 | kvm_x86_ops = ops; |
4658 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 4719 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
4659 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | ||
4660 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 4720 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
4661 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 4721 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
4662 | 4722 | ||
@@ -5119,6 +5179,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5119 | vcpu->fpu_active = 0; | 5179 | vcpu->fpu_active = 0; |
5120 | kvm_x86_ops->fpu_deactivate(vcpu); | 5180 | kvm_x86_ops->fpu_deactivate(vcpu); |
5121 | } | 5181 | } |
5182 | if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { | ||
5183 | /* Page is swapped out. Do synthetic halt */ | ||
5184 | vcpu->arch.apf.halted = true; | ||
5185 | r = 1; | ||
5186 | goto out; | ||
5187 | } | ||
5122 | } | 5188 | } |
5123 | 5189 | ||
5124 | r = kvm_mmu_reload(vcpu); | 5190 | r = kvm_mmu_reload(vcpu); |
@@ -5247,7 +5313,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5247 | 5313 | ||
5248 | r = 1; | 5314 | r = 1; |
5249 | while (r > 0) { | 5315 | while (r > 0) { |
5250 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 5316 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
5317 | !vcpu->arch.apf.halted) | ||
5251 | r = vcpu_enter_guest(vcpu); | 5318 | r = vcpu_enter_guest(vcpu); |
5252 | else { | 5319 | else { |
5253 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 5320 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
@@ -5260,6 +5327,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5260 | vcpu->arch.mp_state = | 5327 | vcpu->arch.mp_state = |
5261 | KVM_MP_STATE_RUNNABLE; | 5328 | KVM_MP_STATE_RUNNABLE; |
5262 | case KVM_MP_STATE_RUNNABLE: | 5329 | case KVM_MP_STATE_RUNNABLE: |
5330 | vcpu->arch.apf.halted = false; | ||
5263 | break; | 5331 | break; |
5264 | case KVM_MP_STATE_SIPI_RECEIVED: | 5332 | case KVM_MP_STATE_SIPI_RECEIVED: |
5265 | default: | 5333 | default: |
@@ -5281,6 +5349,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5281 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 5349 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
5282 | ++vcpu->stat.request_irq_exits; | 5350 | ++vcpu->stat.request_irq_exits; |
5283 | } | 5351 | } |
5352 | |||
5353 | kvm_check_async_pf_completion(vcpu); | ||
5354 | |||
5284 | if (signal_pending(current)) { | 5355 | if (signal_pending(current)) { |
5285 | r = -EINTR; | 5356 | r = -EINTR; |
5286 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 5357 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
@@ -5305,6 +5376,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5305 | int r; | 5376 | int r; |
5306 | sigset_t sigsaved; | 5377 | sigset_t sigsaved; |
5307 | 5378 | ||
5379 | if (!tsk_used_math(current) && init_fpu(current)) | ||
5380 | return -ENOMEM; | ||
5381 | |||
5308 | if (vcpu->sigset_active) | 5382 | if (vcpu->sigset_active) |
5309 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 5383 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
5310 | 5384 | ||
@@ -5316,8 +5390,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5316 | } | 5390 | } |
5317 | 5391 | ||
5318 | /* re-sync apic's tpr */ | 5392 | /* re-sync apic's tpr */ |
5319 | if (!irqchip_in_kernel(vcpu->kvm)) | 5393 | if (!irqchip_in_kernel(vcpu->kvm)) { |
5320 | kvm_set_cr8(vcpu, kvm_run->cr8); | 5394 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { |
5395 | r = -EINVAL; | ||
5396 | goto out; | ||
5397 | } | ||
5398 | } | ||
5321 | 5399 | ||
5322 | if (vcpu->arch.pio.count || vcpu->mmio_needed) { | 5400 | if (vcpu->arch.pio.count || vcpu->mmio_needed) { |
5323 | if (vcpu->mmio_needed) { | 5401 | if (vcpu->mmio_needed) { |
@@ -5326,7 +5404,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5326 | vcpu->mmio_needed = 0; | 5404 | vcpu->mmio_needed = 0; |
5327 | } | 5405 | } |
5328 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 5406 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
5329 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); | 5407 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); |
5330 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5408 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
5331 | if (r != EMULATE_DONE) { | 5409 | if (r != EMULATE_DONE) { |
5332 | r = 0; | 5410 | r = 0; |
@@ -5439,7 +5517,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
5439 | 5517 | ||
5440 | sregs->cr0 = kvm_read_cr0(vcpu); | 5518 | sregs->cr0 = kvm_read_cr0(vcpu); |
5441 | sregs->cr2 = vcpu->arch.cr2; | 5519 | sregs->cr2 = vcpu->arch.cr2; |
5442 | sregs->cr3 = vcpu->arch.cr3; | 5520 | sregs->cr3 = kvm_read_cr3(vcpu); |
5443 | sregs->cr4 = kvm_read_cr4(vcpu); | 5521 | sregs->cr4 = kvm_read_cr4(vcpu); |
5444 | sregs->cr8 = kvm_get_cr8(vcpu); | 5522 | sregs->cr8 = kvm_get_cr8(vcpu); |
5445 | sregs->efer = vcpu->arch.efer; | 5523 | sregs->efer = vcpu->arch.efer; |
@@ -5507,8 +5585,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5507 | kvm_x86_ops->set_gdt(vcpu, &dt); | 5585 | kvm_x86_ops->set_gdt(vcpu, &dt); |
5508 | 5586 | ||
5509 | vcpu->arch.cr2 = sregs->cr2; | 5587 | vcpu->arch.cr2 = sregs->cr2; |
5510 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; | 5588 | mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; |
5511 | vcpu->arch.cr3 = sregs->cr3; | 5589 | vcpu->arch.cr3 = sregs->cr3; |
5590 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
5512 | 5591 | ||
5513 | kvm_set_cr8(vcpu, sregs->cr8); | 5592 | kvm_set_cr8(vcpu, sregs->cr8); |
5514 | 5593 | ||
@@ -5522,8 +5601,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5522 | 5601 | ||
5523 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; | 5602 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
5524 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5603 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
5604 | if (sregs->cr4 & X86_CR4_OSXSAVE) | ||
5605 | update_cpuid(vcpu); | ||
5525 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5606 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
5526 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); | 5607 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
5527 | mmu_reset_needed = 1; | 5608 | mmu_reset_needed = 1; |
5528 | } | 5609 | } |
5529 | 5610 | ||
@@ -5774,6 +5855,8 @@ free_vcpu: | |||
5774 | 5855 | ||
5775 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 5856 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
5776 | { | 5857 | { |
5858 | vcpu->arch.apf.msr_val = 0; | ||
5859 | |||
5777 | vcpu_load(vcpu); | 5860 | vcpu_load(vcpu); |
5778 | kvm_mmu_unload(vcpu); | 5861 | kvm_mmu_unload(vcpu); |
5779 | vcpu_put(vcpu); | 5862 | vcpu_put(vcpu); |
@@ -5793,6 +5876,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5793 | vcpu->arch.dr7 = DR7_FIXED_1; | 5876 | vcpu->arch.dr7 = DR7_FIXED_1; |
5794 | 5877 | ||
5795 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5878 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5879 | vcpu->arch.apf.msr_val = 0; | ||
5880 | |||
5881 | kvm_clear_async_pf_completion_queue(vcpu); | ||
5882 | kvm_async_pf_hash_reset(vcpu); | ||
5883 | vcpu->arch.apf.halted = false; | ||
5796 | 5884 | ||
5797 | return kvm_x86_ops->vcpu_reset(vcpu); | 5885 | return kvm_x86_ops->vcpu_reset(vcpu); |
5798 | } | 5886 | } |
@@ -5882,6 +5970,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5882 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | 5970 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) |
5883 | goto fail_free_mce_banks; | 5971 | goto fail_free_mce_banks; |
5884 | 5972 | ||
5973 | kvm_async_pf_hash_reset(vcpu); | ||
5974 | |||
5885 | return 0; | 5975 | return 0; |
5886 | fail_free_mce_banks: | 5976 | fail_free_mce_banks: |
5887 | kfree(vcpu->arch.mce_banks); | 5977 | kfree(vcpu->arch.mce_banks); |
@@ -5907,13 +5997,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
5907 | free_page((unsigned long)vcpu->arch.pio_data); | 5997 | free_page((unsigned long)vcpu->arch.pio_data); |
5908 | } | 5998 | } |
5909 | 5999 | ||
5910 | struct kvm *kvm_arch_create_vm(void) | 6000 | int kvm_arch_init_vm(struct kvm *kvm) |
5911 | { | 6001 | { |
5912 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | ||
5913 | |||
5914 | if (!kvm) | ||
5915 | return ERR_PTR(-ENOMEM); | ||
5916 | |||
5917 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 6002 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
5918 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 6003 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
5919 | 6004 | ||
@@ -5922,7 +6007,7 @@ struct kvm *kvm_arch_create_vm(void) | |||
5922 | 6007 | ||
5923 | spin_lock_init(&kvm->arch.tsc_write_lock); | 6008 | spin_lock_init(&kvm->arch.tsc_write_lock); |
5924 | 6009 | ||
5925 | return kvm; | 6010 | return 0; |
5926 | } | 6011 | } |
5927 | 6012 | ||
5928 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | 6013 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) |
@@ -5940,8 +6025,10 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
5940 | /* | 6025 | /* |
5941 | * Unpin any mmu pages first. | 6026 | * Unpin any mmu pages first. |
5942 | */ | 6027 | */ |
5943 | kvm_for_each_vcpu(i, vcpu, kvm) | 6028 | kvm_for_each_vcpu(i, vcpu, kvm) { |
6029 | kvm_clear_async_pf_completion_queue(vcpu); | ||
5944 | kvm_unload_vcpu_mmu(vcpu); | 6030 | kvm_unload_vcpu_mmu(vcpu); |
6031 | } | ||
5945 | kvm_for_each_vcpu(i, vcpu, kvm) | 6032 | kvm_for_each_vcpu(i, vcpu, kvm) |
5946 | kvm_arch_vcpu_free(vcpu); | 6033 | kvm_arch_vcpu_free(vcpu); |
5947 | 6034 | ||
@@ -5965,13 +6052,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
5965 | kfree(kvm->arch.vpic); | 6052 | kfree(kvm->arch.vpic); |
5966 | kfree(kvm->arch.vioapic); | 6053 | kfree(kvm->arch.vioapic); |
5967 | kvm_free_vcpus(kvm); | 6054 | kvm_free_vcpus(kvm); |
5968 | kvm_free_physmem(kvm); | ||
5969 | if (kvm->arch.apic_access_page) | 6055 | if (kvm->arch.apic_access_page) |
5970 | put_page(kvm->arch.apic_access_page); | 6056 | put_page(kvm->arch.apic_access_page); |
5971 | if (kvm->arch.ept_identity_pagetable) | 6057 | if (kvm->arch.ept_identity_pagetable) |
5972 | put_page(kvm->arch.ept_identity_pagetable); | 6058 | put_page(kvm->arch.ept_identity_pagetable); |
5973 | cleanup_srcu_struct(&kvm->srcu); | ||
5974 | kfree(kvm); | ||
5975 | } | 6059 | } |
5976 | 6060 | ||
5977 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 6061 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
@@ -6052,7 +6136,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm) | |||
6052 | 6136 | ||
6053 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 6137 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
6054 | { | 6138 | { |
6055 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE | 6139 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
6140 | !vcpu->arch.apf.halted) | ||
6141 | || !list_empty_careful(&vcpu->async_pf.done) | ||
6056 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 6142 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED |
6057 | || vcpu->arch.nmi_pending || | 6143 | || vcpu->arch.nmi_pending || |
6058 | (kvm_arch_interrupt_allowed(vcpu) && | 6144 | (kvm_arch_interrupt_allowed(vcpu) && |
@@ -6111,6 +6197,147 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | |||
6111 | } | 6197 | } |
6112 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 6198 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
6113 | 6199 | ||
6200 | void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) | ||
6201 | { | ||
6202 | int r; | ||
6203 | |||
6204 | if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || | ||
6205 | is_error_page(work->page)) | ||
6206 | return; | ||
6207 | |||
6208 | r = kvm_mmu_reload(vcpu); | ||
6209 | if (unlikely(r)) | ||
6210 | return; | ||
6211 | |||
6212 | if (!vcpu->arch.mmu.direct_map && | ||
6213 | work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu)) | ||
6214 | return; | ||
6215 | |||
6216 | vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true); | ||
6217 | } | ||
6218 | |||
6219 | static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) | ||
6220 | { | ||
6221 | return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU)); | ||
6222 | } | ||
6223 | |||
6224 | static inline u32 kvm_async_pf_next_probe(u32 key) | ||
6225 | { | ||
6226 | return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1); | ||
6227 | } | ||
6228 | |||
6229 | static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6230 | { | ||
6231 | u32 key = kvm_async_pf_hash_fn(gfn); | ||
6232 | |||
6233 | while (vcpu->arch.apf.gfns[key] != ~0) | ||
6234 | key = kvm_async_pf_next_probe(key); | ||
6235 | |||
6236 | vcpu->arch.apf.gfns[key] = gfn; | ||
6237 | } | ||
6238 | |||
6239 | static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6240 | { | ||
6241 | int i; | ||
6242 | u32 key = kvm_async_pf_hash_fn(gfn); | ||
6243 | |||
6244 | for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) && | ||
6245 | (vcpu->arch.apf.gfns[key] != gfn && | ||
6246 | vcpu->arch.apf.gfns[key] != ~0); i++) | ||
6247 | key = kvm_async_pf_next_probe(key); | ||
6248 | |||
6249 | return key; | ||
6250 | } | ||
6251 | |||
6252 | bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6253 | { | ||
6254 | return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn; | ||
6255 | } | ||
6256 | |||
6257 | static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6258 | { | ||
6259 | u32 i, j, k; | ||
6260 | |||
6261 | i = j = kvm_async_pf_gfn_slot(vcpu, gfn); | ||
6262 | while (true) { | ||
6263 | vcpu->arch.apf.gfns[i] = ~0; | ||
6264 | do { | ||
6265 | j = kvm_async_pf_next_probe(j); | ||
6266 | if (vcpu->arch.apf.gfns[j] == ~0) | ||
6267 | return; | ||
6268 | k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]); | ||
6269 | /* | ||
6270 | * k lies cyclically in ]i,j] | ||
6271 | * | i.k.j | | ||
6272 | * |....j i.k.| or |.k..j i...| | ||
6273 | */ | ||
6274 | } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j)); | ||
6275 | vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j]; | ||
6276 | i = j; | ||
6277 | } | ||
6278 | } | ||
6279 | |||
6280 | static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) | ||
6281 | { | ||
6282 | |||
6283 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, | ||
6284 | sizeof(val)); | ||
6285 | } | ||
6286 | |||
6287 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | ||
6288 | struct kvm_async_pf *work) | ||
6289 | { | ||
6290 | struct x86_exception fault; | ||
6291 | |||
6292 | trace_kvm_async_pf_not_present(work->arch.token, work->gva); | ||
6293 | kvm_add_async_pf_gfn(vcpu, work->arch.gfn); | ||
6294 | |||
6295 | if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || | ||
6296 | (vcpu->arch.apf.send_user_only && | ||
6297 | kvm_x86_ops->get_cpl(vcpu) == 0)) | ||
6298 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
6299 | else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { | ||
6300 | fault.vector = PF_VECTOR; | ||
6301 | fault.error_code_valid = true; | ||
6302 | fault.error_code = 0; | ||
6303 | fault.nested_page_fault = false; | ||
6304 | fault.address = work->arch.token; | ||
6305 | kvm_inject_page_fault(vcpu, &fault); | ||
6306 | } | ||
6307 | } | ||
6308 | |||
6309 | void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | ||
6310 | struct kvm_async_pf *work) | ||
6311 | { | ||
6312 | struct x86_exception fault; | ||
6313 | |||
6314 | trace_kvm_async_pf_ready(work->arch.token, work->gva); | ||
6315 | if (is_error_page(work->page)) | ||
6316 | work->arch.token = ~0; /* broadcast wakeup */ | ||
6317 | else | ||
6318 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); | ||
6319 | |||
6320 | if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && | ||
6321 | !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { | ||
6322 | fault.vector = PF_VECTOR; | ||
6323 | fault.error_code_valid = true; | ||
6324 | fault.error_code = 0; | ||
6325 | fault.nested_page_fault = false; | ||
6326 | fault.address = work->arch.token; | ||
6327 | kvm_inject_page_fault(vcpu, &fault); | ||
6328 | } | ||
6329 | vcpu->arch.apf.halted = false; | ||
6330 | } | ||
6331 | |||
6332 | bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) | ||
6333 | { | ||
6334 | if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED)) | ||
6335 | return true; | ||
6336 | else | ||
6337 | return !kvm_event_needs_reinjection(vcpu) && | ||
6338 | kvm_x86_ops->interrupt_allowed(vcpu); | ||
6339 | } | ||
6340 | |||
6114 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 6341 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
6115 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 6342 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
6116 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 6343 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2cea414489f3..c600da830ce0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -70,6 +70,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
70 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 70 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
71 | } | 71 | } |
72 | 72 | ||
73 | static inline u32 bit(int bitno) | ||
74 | { | ||
75 | return 1 << (bitno & 31); | ||
76 | } | ||
77 | |||
73 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 78 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
74 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 79 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
75 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); | 80 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); |