diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/emulate.c | 1247 | ||||
-rw-r--r-- | arch/x86/kvm/i8259.c | 53 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_timer.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 198 | ||||
-rw-r--r-- | arch/x86/kvm/mmutrace.h | 13 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 37 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 916 | ||||
-rw-r--r-- | arch/x86/kvm/timer.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 165 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 297 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 1506 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 7 |
13 files changed, 2553 insertions, 1894 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4dade6ac0827..5ac0bb465ed6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
34 | 34 | ||
35 | #include "x86.h" | 35 | #include "x86.h" |
36 | #include "tss.h" | ||
36 | 37 | ||
37 | /* | 38 | /* |
38 | * Opcode effective-address decode tables. | 39 | * Opcode effective-address decode tables. |
@@ -50,6 +51,8 @@ | |||
50 | #define DstReg (2<<1) /* Register operand. */ | 51 | #define DstReg (2<<1) /* Register operand. */ |
51 | #define DstMem (3<<1) /* Memory operand. */ | 52 | #define DstMem (3<<1) /* Memory operand. */ |
52 | #define DstAcc (4<<1) /* Destination Accumulator */ | 53 | #define DstAcc (4<<1) /* Destination Accumulator */ |
54 | #define DstDI (5<<1) /* Destination is in ES:(E)DI */ | ||
55 | #define DstMem64 (6<<1) /* 64bit memory operand */ | ||
53 | #define DstMask (7<<1) | 56 | #define DstMask (7<<1) |
54 | /* Source operand type. */ | 57 | /* Source operand type. */ |
55 | #define SrcNone (0<<4) /* No source operand. */ | 58 | #define SrcNone (0<<4) /* No source operand. */ |
@@ -63,6 +66,7 @@ | |||
63 | #define SrcOne (7<<4) /* Implied '1' */ | 66 | #define SrcOne (7<<4) /* Implied '1' */ |
64 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
65 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | ||
66 | #define SrcMask (0xf<<4) | 70 | #define SrcMask (0xf<<4) |
67 | /* Generic ModRM decode. */ | 71 | /* Generic ModRM decode. */ |
68 | #define ModRM (1<<8) | 72 | #define ModRM (1<<8) |
@@ -85,6 +89,9 @@ | |||
85 | #define Src2ImmByte (2<<29) | 89 | #define Src2ImmByte (2<<29) |
86 | #define Src2One (3<<29) | 90 | #define Src2One (3<<29) |
87 | #define Src2Imm16 (4<<29) | 91 | #define Src2Imm16 (4<<29) |
92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
93 | in memory and second argument is located | ||
94 | immediately after the first one in memory. */ | ||
88 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
89 | 96 | ||
90 | enum { | 97 | enum { |
@@ -147,8 +154,8 @@ static u32 opcode_table[256] = { | |||
147 | 0, 0, 0, 0, | 154 | 0, 0, 0, 0, |
148 | /* 0x68 - 0x6F */ | 155 | /* 0x68 - 0x6F */ |
149 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 156 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
150 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 157 | DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ |
151 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 158 | SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ |
152 | /* 0x70 - 0x77 */ | 159 | /* 0x70 - 0x77 */ |
153 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 160 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
154 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 161 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
@@ -173,12 +180,12 @@ static u32 opcode_table[256] = { | |||
173 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
174 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
175 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, |
176 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
177 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
178 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
179 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
180 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
181 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
182 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
183 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 190 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
184 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 191 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
@@ -204,13 +211,13 @@ static u32 opcode_table[256] = { | |||
204 | 0, 0, 0, 0, 0, 0, 0, 0, | 211 | 0, 0, 0, 0, 0, 0, 0, 0, |
205 | /* 0xE0 - 0xE7 */ | 212 | /* 0xE0 - 0xE7 */ |
206 | 0, 0, 0, 0, | 213 | 0, 0, 0, 0, |
207 | ByteOp | SrcImmUByte, SrcImmUByte, | 214 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
208 | ByteOp | SrcImmUByte, SrcImmUByte, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
209 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
210 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
211 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
212 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
214 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
215 | 0, 0, 0, 0, | 222 | 0, 0, 0, 0, |
216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, | 223 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
@@ -343,7 +350,8 @@ static u32 group_table[] = { | |||
343 | [Group5*8] = | 350 | [Group5*8] = |
344 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
345 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, |
354 | SrcMem | ModRM | Stack, 0, | ||
347 | [Group7*8] = | 355 | [Group7*8] = |
348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
349 | SrcNone | ModRM | DstMem | Mov, 0, | 357 | SrcNone | ModRM | DstMem | Mov, 0, |
@@ -353,14 +361,14 @@ static u32 group_table[] = { | |||
353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | 361 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, |
354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | 362 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, |
355 | [Group9*8] = | 363 | [Group9*8] = |
356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | 364 | 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, |
357 | }; | 365 | }; |
358 | 366 | ||
359 | static u32 group2_table[] = { | 367 | static u32 group2_table[] = { |
360 | [Group7*8] = | 368 | [Group7*8] = |
361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, | 369 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, |
362 | SrcNone | ModRM | DstMem | Mov, 0, | 370 | SrcNone | ModRM | DstMem | Mov, 0, |
363 | SrcMem16 | ModRM | Mov, 0, | 371 | SrcMem16 | ModRM | Mov | Priv, 0, |
364 | [Group9*8] = | 372 | [Group9*8] = |
365 | 0, 0, 0, 0, 0, 0, 0, 0, | 373 | 0, 0, 0, 0, 0, 0, 0, 0, |
366 | }; | 374 | }; |
@@ -562,7 +570,7 @@ static u32 group2_table[] = { | |||
562 | #define insn_fetch(_type, _size, _eip) \ | 570 | #define insn_fetch(_type, _size, _eip) \ |
563 | ({ unsigned long _x; \ | 571 | ({ unsigned long _x; \ |
564 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ | 572 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ |
565 | if (rc != 0) \ | 573 | if (rc != X86EMUL_CONTINUE) \ |
566 | goto done; \ | 574 | goto done; \ |
567 | (_eip) += (_size); \ | 575 | (_eip) += (_size); \ |
568 | (_type)_x; \ | 576 | (_type)_x; \ |
@@ -638,40 +646,40 @@ static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | |||
638 | 646 | ||
639 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
640 | struct x86_emulate_ops *ops, | 648 | struct x86_emulate_ops *ops, |
641 | unsigned long linear, u8 *dest) | 649 | unsigned long eip, u8 *dest) |
642 | { | 650 | { |
643 | struct fetch_cache *fc = &ctxt->decode.fetch; | 651 | struct fetch_cache *fc = &ctxt->decode.fetch; |
644 | int rc; | 652 | int rc; |
645 | int size; | 653 | int size, cur_size; |
646 | 654 | ||
647 | if (linear < fc->start || linear >= fc->end) { | 655 | if (eip == fc->end) { |
648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 656 | cur_size = fc->end - fc->start; |
649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); | 657 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
650 | if (rc) | 658 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, |
659 | size, ctxt->vcpu, NULL); | ||
660 | if (rc != X86EMUL_CONTINUE) | ||
651 | return rc; | 661 | return rc; |
652 | fc->start = linear; | 662 | fc->end += size; |
653 | fc->end = linear + size; | ||
654 | } | 663 | } |
655 | *dest = fc->data[linear - fc->start]; | 664 | *dest = fc->data[eip - fc->start]; |
656 | return 0; | 665 | return X86EMUL_CONTINUE; |
657 | } | 666 | } |
658 | 667 | ||
659 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 668 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
660 | struct x86_emulate_ops *ops, | 669 | struct x86_emulate_ops *ops, |
661 | unsigned long eip, void *dest, unsigned size) | 670 | unsigned long eip, void *dest, unsigned size) |
662 | { | 671 | { |
663 | int rc = 0; | 672 | int rc; |
664 | 673 | ||
665 | /* x86 instructions are limited to 15 bytes. */ | 674 | /* x86 instructions are limited to 15 bytes. */ |
666 | if (eip + size - ctxt->decode.eip_orig > 15) | 675 | if (eip + size - ctxt->eip > 15) |
667 | return X86EMUL_UNHANDLEABLE; | 676 | return X86EMUL_UNHANDLEABLE; |
668 | eip += ctxt->cs_base; | ||
669 | while (size--) { | 677 | while (size--) { |
670 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 678 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
671 | if (rc) | 679 | if (rc != X86EMUL_CONTINUE) |
672 | return rc; | 680 | return rc; |
673 | } | 681 | } |
674 | return 0; | 682 | return X86EMUL_CONTINUE; |
675 | } | 683 | } |
676 | 684 | ||
677 | /* | 685 | /* |
@@ -702,7 +710,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
702 | *address = 0; | 710 | *address = 0; |
703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 711 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
704 | ctxt->vcpu, NULL); | 712 | ctxt->vcpu, NULL); |
705 | if (rc) | 713 | if (rc != X86EMUL_CONTINUE) |
706 | return rc; | 714 | return rc; |
707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 715 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
708 | ctxt->vcpu, NULL); | 716 | ctxt->vcpu, NULL); |
@@ -782,7 +790,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
782 | struct decode_cache *c = &ctxt->decode; | 790 | struct decode_cache *c = &ctxt->decode; |
783 | u8 sib; | 791 | u8 sib; |
784 | int index_reg = 0, base_reg = 0, scale; | 792 | int index_reg = 0, base_reg = 0, scale; |
785 | int rc = 0; | 793 | int rc = X86EMUL_CONTINUE; |
786 | 794 | ||
787 | if (c->rex_prefix) { | 795 | if (c->rex_prefix) { |
788 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ | 796 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ |
@@ -895,7 +903,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
895 | struct x86_emulate_ops *ops) | 903 | struct x86_emulate_ops *ops) |
896 | { | 904 | { |
897 | struct decode_cache *c = &ctxt->decode; | 905 | struct decode_cache *c = &ctxt->decode; |
898 | int rc = 0; | 906 | int rc = X86EMUL_CONTINUE; |
899 | 907 | ||
900 | switch (c->ad_bytes) { | 908 | switch (c->ad_bytes) { |
901 | case 2: | 909 | case 2: |
@@ -916,14 +924,18 @@ int | |||
916 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 924 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
917 | { | 925 | { |
918 | struct decode_cache *c = &ctxt->decode; | 926 | struct decode_cache *c = &ctxt->decode; |
919 | int rc = 0; | 927 | int rc = X86EMUL_CONTINUE; |
920 | int mode = ctxt->mode; | 928 | int mode = ctxt->mode; |
921 | int def_op_bytes, def_ad_bytes, group; | 929 | int def_op_bytes, def_ad_bytes, group; |
922 | 930 | ||
923 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
924 | 931 | ||
932 | /* we cannot decode insn before we complete previous rep insn */ | ||
933 | WARN_ON(ctxt->restart); | ||
934 | |||
935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
925 | memset(c, 0, sizeof(struct decode_cache)); | 936 | memset(c, 0, sizeof(struct decode_cache)); |
926 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); | 937 | c->eip = ctxt->eip; |
938 | c->fetch.start = c->fetch.end = c->eip; | ||
927 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
928 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
929 | 941 | ||
@@ -1015,11 +1027,6 @@ done_prefixes: | |||
1015 | } | 1027 | } |
1016 | } | 1028 | } |
1017 | 1029 | ||
1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); | ||
1020 | return -1; | ||
1021 | } | ||
1022 | |||
1023 | if (c->d & Group) { | 1030 | if (c->d & Group) { |
1024 | group = c->d & GroupMask; | 1031 | group = c->d & GroupMask; |
1025 | c->modrm = insn_fetch(u8, 1, c->eip); | 1032 | c->modrm = insn_fetch(u8, 1, c->eip); |
@@ -1046,7 +1053,7 @@ done_prefixes: | |||
1046 | rc = decode_modrm(ctxt, ops); | 1053 | rc = decode_modrm(ctxt, ops); |
1047 | else if (c->d & MemAbs) | 1054 | else if (c->d & MemAbs) |
1048 | rc = decode_abs(ctxt, ops); | 1055 | rc = decode_abs(ctxt, ops); |
1049 | if (rc) | 1056 | if (rc != X86EMUL_CONTINUE) |
1050 | goto done; | 1057 | goto done; |
1051 | 1058 | ||
1052 | if (!c->has_seg_override) | 1059 | if (!c->has_seg_override) |
@@ -1057,6 +1064,10 @@ done_prefixes: | |||
1057 | 1064 | ||
1058 | if (c->ad_bytes != 8) | 1065 | if (c->ad_bytes != 8) |
1059 | c->modrm_ea = (u32)c->modrm_ea; | 1066 | c->modrm_ea = (u32)c->modrm_ea; |
1067 | |||
1068 | if (c->rip_relative) | ||
1069 | c->modrm_ea += c->eip; | ||
1070 | |||
1060 | /* | 1071 | /* |
1061 | * Decode and fetch the source operand: register, memory | 1072 | * Decode and fetch the source operand: register, memory |
1062 | * or immediate. | 1073 | * or immediate. |
@@ -1091,6 +1102,8 @@ done_prefixes: | |||
1091 | break; | 1102 | break; |
1092 | } | 1103 | } |
1093 | c->src.type = OP_MEM; | 1104 | c->src.type = OP_MEM; |
1105 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
1106 | c->src.val = 0; | ||
1094 | break; | 1107 | break; |
1095 | case SrcImm: | 1108 | case SrcImm: |
1096 | case SrcImmU: | 1109 | case SrcImmU: |
@@ -1139,6 +1152,14 @@ done_prefixes: | |||
1139 | c->src.bytes = 1; | 1152 | c->src.bytes = 1; |
1140 | c->src.val = 1; | 1153 | c->src.val = 1; |
1141 | break; | 1154 | break; |
1155 | case SrcSI: | ||
1156 | c->src.type = OP_MEM; | ||
1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1158 | c->src.ptr = (unsigned long *) | ||
1159 | register_address(c, seg_override_base(ctxt, c), | ||
1160 | c->regs[VCPU_REGS_RSI]); | ||
1161 | c->src.val = 0; | ||
1162 | break; | ||
1142 | } | 1163 | } |
1143 | 1164 | ||
1144 | /* | 1165 | /* |
@@ -1168,6 +1189,12 @@ done_prefixes: | |||
1168 | c->src2.bytes = 1; | 1189 | c->src2.bytes = 1; |
1169 | c->src2.val = 1; | 1190 | c->src2.val = 1; |
1170 | break; | 1191 | break; |
1192 | case Src2Mem16: | ||
1193 | c->src2.type = OP_MEM; | ||
1194 | c->src2.bytes = 2; | ||
1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
1196 | c->src2.val = 0; | ||
1197 | break; | ||
1171 | } | 1198 | } |
1172 | 1199 | ||
1173 | /* Decode and fetch the destination operand: register or memory. */ | 1200 | /* Decode and fetch the destination operand: register or memory. */ |
@@ -1180,6 +1207,7 @@ done_prefixes: | |||
1180 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 1207 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); |
1181 | break; | 1208 | break; |
1182 | case DstMem: | 1209 | case DstMem: |
1210 | case DstMem64: | ||
1183 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1211 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
1184 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1212 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1185 | c->dst.type = OP_REG; | 1213 | c->dst.type = OP_REG; |
@@ -1188,12 +1216,24 @@ done_prefixes: | |||
1188 | break; | 1216 | break; |
1189 | } | 1217 | } |
1190 | c->dst.type = OP_MEM; | 1218 | c->dst.type = OP_MEM; |
1219 | c->dst.ptr = (unsigned long *)c->modrm_ea; | ||
1220 | if ((c->d & DstMask) == DstMem64) | ||
1221 | c->dst.bytes = 8; | ||
1222 | else | ||
1223 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1224 | c->dst.val = 0; | ||
1225 | if (c->d & BitOp) { | ||
1226 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | ||
1227 | |||
1228 | c->dst.ptr = (void *)c->dst.ptr + | ||
1229 | (c->src.val & mask) / 8; | ||
1230 | } | ||
1191 | break; | 1231 | break; |
1192 | case DstAcc: | 1232 | case DstAcc: |
1193 | c->dst.type = OP_REG; | 1233 | c->dst.type = OP_REG; |
1194 | c->dst.bytes = c->op_bytes; | 1234 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1195 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | 1235 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; |
1196 | switch (c->op_bytes) { | 1236 | switch (c->dst.bytes) { |
1197 | case 1: | 1237 | case 1: |
1198 | c->dst.val = *(u8 *)c->dst.ptr; | 1238 | c->dst.val = *(u8 *)c->dst.ptr; |
1199 | break; | 1239 | break; |
@@ -1203,18 +1243,248 @@ done_prefixes: | |||
1203 | case 4: | 1243 | case 4: |
1204 | c->dst.val = *(u32 *)c->dst.ptr; | 1244 | c->dst.val = *(u32 *)c->dst.ptr; |
1205 | break; | 1245 | break; |
1246 | case 8: | ||
1247 | c->dst.val = *(u64 *)c->dst.ptr; | ||
1248 | break; | ||
1206 | } | 1249 | } |
1207 | c->dst.orig_val = c->dst.val; | 1250 | c->dst.orig_val = c->dst.val; |
1208 | break; | 1251 | break; |
1252 | case DstDI: | ||
1253 | c->dst.type = OP_MEM; | ||
1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1255 | c->dst.ptr = (unsigned long *) | ||
1256 | register_address(c, es_base(ctxt), | ||
1257 | c->regs[VCPU_REGS_RDI]); | ||
1258 | c->dst.val = 0; | ||
1259 | break; | ||
1209 | } | 1260 | } |
1210 | 1261 | ||
1211 | if (c->rip_relative) | ||
1212 | c->modrm_ea += c->eip; | ||
1213 | |||
1214 | done: | 1262 | done: |
1215 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
1216 | } | 1264 | } |
1217 | 1265 | ||
1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | ||
1267 | struct x86_emulate_ops *ops, | ||
1268 | unsigned int size, unsigned short port, | ||
1269 | void *dest) | ||
1270 | { | ||
1271 | struct read_cache *rc = &ctxt->decode.io_read; | ||
1272 | |||
1273 | if (rc->pos == rc->end) { /* refill pio read ahead */ | ||
1274 | struct decode_cache *c = &ctxt->decode; | ||
1275 | unsigned int in_page, n; | ||
1276 | unsigned int count = c->rep_prefix ? | ||
1277 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; | ||
1278 | in_page = (ctxt->eflags & EFLG_DF) ? | ||
1279 | offset_in_page(c->regs[VCPU_REGS_RDI]) : | ||
1280 | PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); | ||
1281 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | ||
1282 | count); | ||
1283 | if (n == 0) | ||
1284 | n = 1; | ||
1285 | rc->pos = rc->end = 0; | ||
1286 | if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) | ||
1287 | return 0; | ||
1288 | rc->end = n * size; | ||
1289 | } | ||
1290 | |||
1291 | memcpy(dest, rc->data + rc->pos, size); | ||
1292 | rc->pos += size; | ||
1293 | return 1; | ||
1294 | } | ||
1295 | |||
1296 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
1297 | { | ||
1298 | u32 limit = get_desc_limit(desc); | ||
1299 | |||
1300 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
1301 | } | ||
1302 | |||
1303 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | ||
1304 | struct x86_emulate_ops *ops, | ||
1305 | u16 selector, struct desc_ptr *dt) | ||
1306 | { | ||
1307 | if (selector & 1 << 2) { | ||
1308 | struct desc_struct desc; | ||
1309 | memset (dt, 0, sizeof *dt); | ||
1310 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | ||
1311 | return; | ||
1312 | |||
1313 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | ||
1314 | dt->address = get_desc_base(&desc); | ||
1315 | } else | ||
1316 | ops->get_gdt(dt, ctxt->vcpu); | ||
1317 | } | ||
1318 | |||
1319 | /* allowed just for 8 bytes segments */ | ||
1320 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1321 | struct x86_emulate_ops *ops, | ||
1322 | u16 selector, struct desc_struct *desc) | ||
1323 | { | ||
1324 | struct desc_ptr dt; | ||
1325 | u16 index = selector >> 3; | ||
1326 | int ret; | ||
1327 | u32 err; | ||
1328 | ulong addr; | ||
1329 | |||
1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
1331 | |||
1332 | if (dt.size < index * 8 + 7) { | ||
1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
1334 | return X86EMUL_PROPAGATE_FAULT; | ||
1335 | } | ||
1336 | addr = dt.address + index * 8; | ||
1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
1340 | |||
1341 | return ret; | ||
1342 | } | ||
1343 | |||
1344 | /* allowed just for 8 bytes segments */ | ||
1345 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1346 | struct x86_emulate_ops *ops, | ||
1347 | u16 selector, struct desc_struct *desc) | ||
1348 | { | ||
1349 | struct desc_ptr dt; | ||
1350 | u16 index = selector >> 3; | ||
1351 | u32 err; | ||
1352 | ulong addr; | ||
1353 | int ret; | ||
1354 | |||
1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
1356 | |||
1357 | if (dt.size < index * 8 + 7) { | ||
1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
1359 | return X86EMUL_PROPAGATE_FAULT; | ||
1360 | } | ||
1361 | |||
1362 | addr = dt.address + index * 8; | ||
1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
1366 | |||
1367 | return ret; | ||
1368 | } | ||
1369 | |||
1370 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1371 | struct x86_emulate_ops *ops, | ||
1372 | u16 selector, int seg) | ||
1373 | { | ||
1374 | struct desc_struct seg_desc; | ||
1375 | u8 dpl, rpl, cpl; | ||
1376 | unsigned err_vec = GP_VECTOR; | ||
1377 | u32 err_code = 0; | ||
1378 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
1379 | int ret; | ||
1380 | |||
1381 | memset(&seg_desc, 0, sizeof seg_desc); | ||
1382 | |||
1383 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | ||
1384 | || ctxt->mode == X86EMUL_MODE_REAL) { | ||
1385 | /* set real mode segment descriptor */ | ||
1386 | set_desc_base(&seg_desc, selector << 4); | ||
1387 | set_desc_limit(&seg_desc, 0xffff); | ||
1388 | seg_desc.type = 3; | ||
1389 | seg_desc.p = 1; | ||
1390 | seg_desc.s = 1; | ||
1391 | goto load; | ||
1392 | } | ||
1393 | |||
1394 | /* NULL selector is not valid for TR, CS and SS */ | ||
1395 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | ||
1396 | && null_selector) | ||
1397 | goto exception; | ||
1398 | |||
1399 | /* TR should be in GDT only */ | ||
1400 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
1401 | goto exception; | ||
1402 | |||
1403 | if (null_selector) /* for NULL selector skip all following checks */ | ||
1404 | goto load; | ||
1405 | |||
1406 | ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
1407 | if (ret != X86EMUL_CONTINUE) | ||
1408 | return ret; | ||
1409 | |||
1410 | err_code = selector & 0xfffc; | ||
1411 | err_vec = GP_VECTOR; | ||
1412 | |||
1413 | /* can't load system descriptor into segment selecor */ | ||
1414 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | ||
1415 | goto exception; | ||
1416 | |||
1417 | if (!seg_desc.p) { | ||
1418 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
1419 | goto exception; | ||
1420 | } | ||
1421 | |||
1422 | rpl = selector & 3; | ||
1423 | dpl = seg_desc.dpl; | ||
1424 | cpl = ops->cpl(ctxt->vcpu); | ||
1425 | |||
1426 | switch (seg) { | ||
1427 | case VCPU_SREG_SS: | ||
1428 | /* | ||
1429 | * segment is not a writable data segment or segment | ||
1430 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
1431 | */ | ||
1432 | if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) | ||
1433 | goto exception; | ||
1434 | break; | ||
1435 | case VCPU_SREG_CS: | ||
1436 | if (!(seg_desc.type & 8)) | ||
1437 | goto exception; | ||
1438 | |||
1439 | if (seg_desc.type & 4) { | ||
1440 | /* conforming */ | ||
1441 | if (dpl > cpl) | ||
1442 | goto exception; | ||
1443 | } else { | ||
1444 | /* nonconforming */ | ||
1445 | if (rpl > cpl || dpl != cpl) | ||
1446 | goto exception; | ||
1447 | } | ||
1448 | /* CS(RPL) <- CPL */ | ||
1449 | selector = (selector & 0xfffc) | cpl; | ||
1450 | break; | ||
1451 | case VCPU_SREG_TR: | ||
1452 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) | ||
1453 | goto exception; | ||
1454 | break; | ||
1455 | case VCPU_SREG_LDTR: | ||
1456 | if (seg_desc.s || seg_desc.type != 2) | ||
1457 | goto exception; | ||
1458 | break; | ||
1459 | default: /* DS, ES, FS, or GS */ | ||
1460 | /* | ||
1461 | * segment is not a data or readable code segment or | ||
1462 | * ((segment is a data or nonconforming code segment) | ||
1463 | * and (both RPL and CPL > DPL)) | ||
1464 | */ | ||
1465 | if ((seg_desc.type & 0xa) == 0x8 || | ||
1466 | (((seg_desc.type & 0xc) != 0xc) && | ||
1467 | (rpl > dpl && cpl > dpl))) | ||
1468 | goto exception; | ||
1469 | break; | ||
1470 | } | ||
1471 | |||
1472 | if (seg_desc.s) { | ||
1473 | /* mark segment as accessed */ | ||
1474 | seg_desc.type |= 1; | ||
1475 | ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
1476 | if (ret != X86EMUL_CONTINUE) | ||
1477 | return ret; | ||
1478 | } | ||
1479 | load: | ||
1480 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | ||
1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | ||
1482 | return X86EMUL_CONTINUE; | ||
1483 | exception: | ||
1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | ||
1485 | return X86EMUL_PROPAGATE_FAULT; | ||
1486 | } | ||
1487 | |||
1218 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) |
1219 | { | 1489 | { |
1220 | struct decode_cache *c = &ctxt->decode; | 1490 | struct decode_cache *c = &ctxt->decode; |
@@ -1251,7 +1521,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1251 | int rc; | 1521 | int rc; |
1252 | unsigned long val, change_mask; | 1522 | unsigned long val, change_mask; |
1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1523 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | 1524 | int cpl = ops->cpl(ctxt->vcpu); |
1255 | 1525 | ||
1256 | rc = emulate_pop(ctxt, ops, &val, len); | 1526 | rc = emulate_pop(ctxt, ops, &val, len); |
1257 | if (rc != X86EMUL_CONTINUE) | 1527 | if (rc != X86EMUL_CONTINUE) |
@@ -1306,10 +1576,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1306 | int rc; | 1576 | int rc; |
1307 | 1577 | ||
1308 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | 1578 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); |
1309 | if (rc != 0) | 1579 | if (rc != X86EMUL_CONTINUE) |
1310 | return rc; | 1580 | return rc; |
1311 | 1581 | ||
1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); | 1582 | rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); |
1313 | return rc; | 1583 | return rc; |
1314 | } | 1584 | } |
1315 | 1585 | ||
@@ -1332,7 +1602,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1332 | struct x86_emulate_ops *ops) | 1602 | struct x86_emulate_ops *ops) |
1333 | { | 1603 | { |
1334 | struct decode_cache *c = &ctxt->decode; | 1604 | struct decode_cache *c = &ctxt->decode; |
1335 | int rc = 0; | 1605 | int rc = X86EMUL_CONTINUE; |
1336 | int reg = VCPU_REGS_RDI; | 1606 | int reg = VCPU_REGS_RDI; |
1337 | 1607 | ||
1338 | while (reg >= VCPU_REGS_RAX) { | 1608 | while (reg >= VCPU_REGS_RAX) { |
@@ -1343,7 +1613,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1343 | } | 1613 | } |
1344 | 1614 | ||
1345 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | 1615 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); |
1346 | if (rc != 0) | 1616 | if (rc != X86EMUL_CONTINUE) |
1347 | break; | 1617 | break; |
1348 | --reg; | 1618 | --reg; |
1349 | } | 1619 | } |
@@ -1354,12 +1624,8 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | |||
1354 | struct x86_emulate_ops *ops) | 1624 | struct x86_emulate_ops *ops) |
1355 | { | 1625 | { |
1356 | struct decode_cache *c = &ctxt->decode; | 1626 | struct decode_cache *c = &ctxt->decode; |
1357 | int rc; | ||
1358 | 1627 | ||
1359 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); | 1628 | return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); |
1360 | if (rc != 0) | ||
1361 | return rc; | ||
1362 | return 0; | ||
1363 | } | 1629 | } |
1364 | 1630 | ||
1365 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | 1631 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) |
@@ -1395,7 +1661,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1395 | struct x86_emulate_ops *ops) | 1661 | struct x86_emulate_ops *ops) |
1396 | { | 1662 | { |
1397 | struct decode_cache *c = &ctxt->decode; | 1663 | struct decode_cache *c = &ctxt->decode; |
1398 | int rc = 0; | ||
1399 | 1664 | ||
1400 | switch (c->modrm_reg) { | 1665 | switch (c->modrm_reg) { |
1401 | case 0 ... 1: /* test */ | 1666 | case 0 ... 1: /* test */ |
@@ -1408,11 +1673,9 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1408 | emulate_1op("neg", c->dst, ctxt->eflags); | 1673 | emulate_1op("neg", c->dst, ctxt->eflags); |
1409 | break; | 1674 | break; |
1410 | default: | 1675 | default: |
1411 | DPRINTF("Cannot emulate %02x\n", c->b); | 1676 | return 0; |
1412 | rc = X86EMUL_UNHANDLEABLE; | ||
1413 | break; | ||
1414 | } | 1677 | } |
1415 | return rc; | 1678 | return 1; |
1416 | } | 1679 | } |
1417 | 1680 | ||
1418 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | 1681 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, |
@@ -1442,20 +1705,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1442 | emulate_push(ctxt); | 1705 | emulate_push(ctxt); |
1443 | break; | 1706 | break; |
1444 | } | 1707 | } |
1445 | return 0; | 1708 | return X86EMUL_CONTINUE; |
1446 | } | 1709 | } |
1447 | 1710 | ||
1448 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | 1711 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, |
1449 | struct x86_emulate_ops *ops, | 1712 | struct x86_emulate_ops *ops) |
1450 | unsigned long memop) | ||
1451 | { | 1713 | { |
1452 | struct decode_cache *c = &ctxt->decode; | 1714 | struct decode_cache *c = &ctxt->decode; |
1453 | u64 old, new; | 1715 | u64 old = c->dst.orig_val; |
1454 | int rc; | ||
1455 | |||
1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | ||
1457 | if (rc != X86EMUL_CONTINUE) | ||
1458 | return rc; | ||
1459 | 1716 | ||
1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1717 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
1461 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { | 1718 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { |
@@ -1463,17 +1720,13 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1463 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); | 1720 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); |
1464 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 1721 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); |
1465 | ctxt->eflags &= ~EFLG_ZF; | 1722 | ctxt->eflags &= ~EFLG_ZF; |
1466 | |||
1467 | } else { | 1723 | } else { |
1468 | new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | | 1724 | c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | |
1469 | (u32) c->regs[VCPU_REGS_RBX]; | 1725 | (u32) c->regs[VCPU_REGS_RBX]; |
1470 | 1726 | ||
1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | ||
1472 | if (rc != X86EMUL_CONTINUE) | ||
1473 | return rc; | ||
1474 | ctxt->eflags |= EFLG_ZF; | 1727 | ctxt->eflags |= EFLG_ZF; |
1475 | } | 1728 | } |
1476 | return 0; | 1729 | return X86EMUL_CONTINUE; |
1477 | } | 1730 | } |
1478 | 1731 | ||
1479 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | 1732 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, |
@@ -1484,14 +1737,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1484 | unsigned long cs; | 1737 | unsigned long cs; |
1485 | 1738 | ||
1486 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); | 1739 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); |
1487 | if (rc) | 1740 | if (rc != X86EMUL_CONTINUE) |
1488 | return rc; | 1741 | return rc; |
1489 | if (c->op_bytes == 4) | 1742 | if (c->op_bytes == 4) |
1490 | c->eip = (u32)c->eip; | 1743 | c->eip = (u32)c->eip; |
1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1744 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1492 | if (rc) | 1745 | if (rc != X86EMUL_CONTINUE) |
1493 | return rc; | 1746 | return rc; |
1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); | 1747 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); |
1495 | return rc; | 1748 | return rc; |
1496 | } | 1749 | } |
1497 | 1750 | ||
@@ -1544,7 +1797,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1544 | default: | 1797 | default: |
1545 | break; | 1798 | break; |
1546 | } | 1799 | } |
1547 | return 0; | 1800 | return X86EMUL_CONTINUE; |
1548 | } | 1801 | } |
1549 | 1802 | ||
1550 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | 1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) |
@@ -1598,8 +1851,11 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1598 | u64 msr_data; | 1851 | u64 msr_data; |
1599 | 1852 | ||
1600 | /* syscall is not available in real mode */ | 1853 | /* syscall is not available in real mode */ |
1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) | 1854 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1602 | return X86EMUL_UNHANDLEABLE; | 1855 | ctxt->mode == X86EMUL_MODE_VM86) { |
1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
1857 | return X86EMUL_PROPAGATE_FAULT; | ||
1858 | } | ||
1603 | 1859 | ||
1604 | setup_syscalls_segments(ctxt, &cs, &ss); | 1860 | setup_syscalls_segments(ctxt, &cs, &ss); |
1605 | 1861 | ||
@@ -1649,14 +1905,16 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1649 | /* inject #GP if in real mode */ | 1905 | /* inject #GP if in real mode */ |
1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1651 | kvm_inject_gp(ctxt->vcpu, 0); | 1907 | kvm_inject_gp(ctxt->vcpu, 0); |
1652 | return X86EMUL_UNHANDLEABLE; | 1908 | return X86EMUL_PROPAGATE_FAULT; |
1653 | } | 1909 | } |
1654 | 1910 | ||
1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1911 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1656 | * Therefore, we inject an #UD. | 1912 | * Therefore, we inject an #UD. |
1657 | */ | 1913 | */ |
1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
1659 | return X86EMUL_UNHANDLEABLE; | 1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1916 | return X86EMUL_PROPAGATE_FAULT; | ||
1917 | } | ||
1660 | 1918 | ||
1661 | setup_syscalls_segments(ctxt, &cs, &ss); | 1919 | setup_syscalls_segments(ctxt, &cs, &ss); |
1662 | 1920 | ||
@@ -1711,7 +1969,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1711 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1969 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1712 | ctxt->mode == X86EMUL_MODE_VM86) { | 1970 | ctxt->mode == X86EMUL_MODE_VM86) { |
1713 | kvm_inject_gp(ctxt->vcpu, 0); | 1971 | kvm_inject_gp(ctxt->vcpu, 0); |
1714 | return X86EMUL_UNHANDLEABLE; | 1972 | return X86EMUL_PROPAGATE_FAULT; |
1715 | } | 1973 | } |
1716 | 1974 | ||
1717 | setup_syscalls_segments(ctxt, &cs, &ss); | 1975 | setup_syscalls_segments(ctxt, &cs, &ss); |
@@ -1756,7 +2014,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1756 | return X86EMUL_CONTINUE; | 2014 | return X86EMUL_CONTINUE; |
1757 | } | 2015 | } |
1758 | 2016 | ||
1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | 2017 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, |
2018 | struct x86_emulate_ops *ops) | ||
1760 | { | 2019 | { |
1761 | int iopl; | 2020 | int iopl; |
1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2021 | if (ctxt->mode == X86EMUL_MODE_REAL) |
@@ -1764,7 +2023,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2023 | if (ctxt->mode == X86EMUL_MODE_VM86) |
1765 | return true; | 2024 | return true; |
1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2025 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | 2026 | return ops->cpl(ctxt->vcpu) > iopl; |
1768 | } | 2027 | } |
1769 | 2028 | ||
1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2029 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
@@ -1801,22 +2060,419 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
1801 | struct x86_emulate_ops *ops, | 2060 | struct x86_emulate_ops *ops, |
1802 | u16 port, u16 len) | 2061 | u16 port, u16 len) |
1803 | { | 2062 | { |
1804 | if (emulator_bad_iopl(ctxt)) | 2063 | if (emulator_bad_iopl(ctxt, ops)) |
1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | 2064 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) |
1806 | return false; | 2065 | return false; |
1807 | return true; | 2066 | return true; |
1808 | } | 2067 | } |
1809 | 2068 | ||
2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
2070 | struct x86_emulate_ops *ops, | ||
2071 | int seg) | ||
2072 | { | ||
2073 | struct desc_struct desc; | ||
2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
2075 | return get_desc_base(&desc); | ||
2076 | else | ||
2077 | return ~0; | ||
2078 | } | ||
2079 | |||
2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | ||
2081 | struct x86_emulate_ops *ops, | ||
2082 | struct tss_segment_16 *tss) | ||
2083 | { | ||
2084 | struct decode_cache *c = &ctxt->decode; | ||
2085 | |||
2086 | tss->ip = c->eip; | ||
2087 | tss->flag = ctxt->eflags; | ||
2088 | tss->ax = c->regs[VCPU_REGS_RAX]; | ||
2089 | tss->cx = c->regs[VCPU_REGS_RCX]; | ||
2090 | tss->dx = c->regs[VCPU_REGS_RDX]; | ||
2091 | tss->bx = c->regs[VCPU_REGS_RBX]; | ||
2092 | tss->sp = c->regs[VCPU_REGS_RSP]; | ||
2093 | tss->bp = c->regs[VCPU_REGS_RBP]; | ||
2094 | tss->si = c->regs[VCPU_REGS_RSI]; | ||
2095 | tss->di = c->regs[VCPU_REGS_RDI]; | ||
2096 | |||
2097 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
2098 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
2099 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
2100 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
2101 | tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
2102 | } | ||
2103 | |||
2104 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | ||
2105 | struct x86_emulate_ops *ops, | ||
2106 | struct tss_segment_16 *tss) | ||
2107 | { | ||
2108 | struct decode_cache *c = &ctxt->decode; | ||
2109 | int ret; | ||
2110 | |||
2111 | c->eip = tss->ip; | ||
2112 | ctxt->eflags = tss->flag | 2; | ||
2113 | c->regs[VCPU_REGS_RAX] = tss->ax; | ||
2114 | c->regs[VCPU_REGS_RCX] = tss->cx; | ||
2115 | c->regs[VCPU_REGS_RDX] = tss->dx; | ||
2116 | c->regs[VCPU_REGS_RBX] = tss->bx; | ||
2117 | c->regs[VCPU_REGS_RSP] = tss->sp; | ||
2118 | c->regs[VCPU_REGS_RBP] = tss->bp; | ||
2119 | c->regs[VCPU_REGS_RSI] = tss->si; | ||
2120 | c->regs[VCPU_REGS_RDI] = tss->di; | ||
2121 | |||
2122 | /* | ||
2123 | * SDM says that segment selectors are loaded before segment | ||
2124 | * descriptors | ||
2125 | */ | ||
2126 | ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); | ||
2127 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
2128 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
2129 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2130 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
2131 | |||
2132 | /* | ||
2133 | * Now load segment descriptors. If fault happenes at this stage | ||
2134 | * it is handled in a context of new task | ||
2135 | */ | ||
2136 | ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); | ||
2137 | if (ret != X86EMUL_CONTINUE) | ||
2138 | return ret; | ||
2139 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
2140 | if (ret != X86EMUL_CONTINUE) | ||
2141 | return ret; | ||
2142 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
2143 | if (ret != X86EMUL_CONTINUE) | ||
2144 | return ret; | ||
2145 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
2146 | if (ret != X86EMUL_CONTINUE) | ||
2147 | return ret; | ||
2148 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
2149 | if (ret != X86EMUL_CONTINUE) | ||
2150 | return ret; | ||
2151 | |||
2152 | return X86EMUL_CONTINUE; | ||
2153 | } | ||
2154 | |||
2155 | static int task_switch_16(struct x86_emulate_ctxt *ctxt, | ||
2156 | struct x86_emulate_ops *ops, | ||
2157 | u16 tss_selector, u16 old_tss_sel, | ||
2158 | ulong old_tss_base, struct desc_struct *new_desc) | ||
2159 | { | ||
2160 | struct tss_segment_16 tss_seg; | ||
2161 | int ret; | ||
2162 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
2163 | |||
2164 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2165 | &err); | ||
2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2167 | /* FIXME: need to provide precise fault address */ | ||
2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2169 | return ret; | ||
2170 | } | ||
2171 | |||
2172 | save_state_to_tss16(ctxt, ops, &tss_seg); | ||
2173 | |||
2174 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2175 | &err); | ||
2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2177 | /* FIXME: need to provide precise fault address */ | ||
2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2179 | return ret; | ||
2180 | } | ||
2181 | |||
2182 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2183 | &err); | ||
2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2185 | /* FIXME: need to provide precise fault address */ | ||
2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2187 | return ret; | ||
2188 | } | ||
2189 | |||
2190 | if (old_tss_sel != 0xffff) { | ||
2191 | tss_seg.prev_task_link = old_tss_sel; | ||
2192 | |||
2193 | ret = ops->write_std(new_tss_base, | ||
2194 | &tss_seg.prev_task_link, | ||
2195 | sizeof tss_seg.prev_task_link, | ||
2196 | ctxt->vcpu, &err); | ||
2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2198 | /* FIXME: need to provide precise fault address */ | ||
2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2200 | return ret; | ||
2201 | } | ||
2202 | } | ||
2203 | |||
2204 | return load_state_from_tss16(ctxt, ops, &tss_seg); | ||
2205 | } | ||
2206 | |||
2207 | static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | ||
2208 | struct x86_emulate_ops *ops, | ||
2209 | struct tss_segment_32 *tss) | ||
2210 | { | ||
2211 | struct decode_cache *c = &ctxt->decode; | ||
2212 | |||
2213 | tss->cr3 = ops->get_cr(3, ctxt->vcpu); | ||
2214 | tss->eip = c->eip; | ||
2215 | tss->eflags = ctxt->eflags; | ||
2216 | tss->eax = c->regs[VCPU_REGS_RAX]; | ||
2217 | tss->ecx = c->regs[VCPU_REGS_RCX]; | ||
2218 | tss->edx = c->regs[VCPU_REGS_RDX]; | ||
2219 | tss->ebx = c->regs[VCPU_REGS_RBX]; | ||
2220 | tss->esp = c->regs[VCPU_REGS_RSP]; | ||
2221 | tss->ebp = c->regs[VCPU_REGS_RBP]; | ||
2222 | tss->esi = c->regs[VCPU_REGS_RSI]; | ||
2223 | tss->edi = c->regs[VCPU_REGS_RDI]; | ||
2224 | |||
2225 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
2226 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
2227 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
2228 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
2229 | tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); | ||
2230 | tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); | ||
2231 | tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
2232 | } | ||
2233 | |||
2234 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | ||
2235 | struct x86_emulate_ops *ops, | ||
2236 | struct tss_segment_32 *tss) | ||
2237 | { | ||
2238 | struct decode_cache *c = &ctxt->decode; | ||
2239 | int ret; | ||
2240 | |||
2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | ||
2242 | c->eip = tss->eip; | ||
2243 | ctxt->eflags = tss->eflags | 2; | ||
2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | ||
2245 | c->regs[VCPU_REGS_RCX] = tss->ecx; | ||
2246 | c->regs[VCPU_REGS_RDX] = tss->edx; | ||
2247 | c->regs[VCPU_REGS_RBX] = tss->ebx; | ||
2248 | c->regs[VCPU_REGS_RSP] = tss->esp; | ||
2249 | c->regs[VCPU_REGS_RBP] = tss->ebp; | ||
2250 | c->regs[VCPU_REGS_RSI] = tss->esi; | ||
2251 | c->regs[VCPU_REGS_RDI] = tss->edi; | ||
2252 | |||
2253 | /* | ||
2254 | * SDM says that segment selectors are loaded before segment | ||
2255 | * descriptors | ||
2256 | */ | ||
2257 | ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); | ||
2258 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
2259 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
2260 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2261 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
2262 | ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); | ||
2263 | ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); | ||
2264 | |||
2265 | /* | ||
2266 | * Now load segment descriptors. If fault happenes at this stage | ||
2267 | * it is handled in a context of new task | ||
2268 | */ | ||
2269 | ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); | ||
2270 | if (ret != X86EMUL_CONTINUE) | ||
2271 | return ret; | ||
2272 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
2273 | if (ret != X86EMUL_CONTINUE) | ||
2274 | return ret; | ||
2275 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
2276 | if (ret != X86EMUL_CONTINUE) | ||
2277 | return ret; | ||
2278 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
2279 | if (ret != X86EMUL_CONTINUE) | ||
2280 | return ret; | ||
2281 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
2282 | if (ret != X86EMUL_CONTINUE) | ||
2283 | return ret; | ||
2284 | ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); | ||
2285 | if (ret != X86EMUL_CONTINUE) | ||
2286 | return ret; | ||
2287 | ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); | ||
2288 | if (ret != X86EMUL_CONTINUE) | ||
2289 | return ret; | ||
2290 | |||
2291 | return X86EMUL_CONTINUE; | ||
2292 | } | ||
2293 | |||
2294 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, | ||
2295 | struct x86_emulate_ops *ops, | ||
2296 | u16 tss_selector, u16 old_tss_sel, | ||
2297 | ulong old_tss_base, struct desc_struct *new_desc) | ||
2298 | { | ||
2299 | struct tss_segment_32 tss_seg; | ||
2300 | int ret; | ||
2301 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
2302 | |||
2303 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2304 | &err); | ||
2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2306 | /* FIXME: need to provide precise fault address */ | ||
2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2308 | return ret; | ||
2309 | } | ||
2310 | |||
2311 | save_state_to_tss32(ctxt, ops, &tss_seg); | ||
2312 | |||
2313 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2314 | &err); | ||
2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2316 | /* FIXME: need to provide precise fault address */ | ||
2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2318 | return ret; | ||
2319 | } | ||
2320 | |||
2321 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2322 | &err); | ||
2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2324 | /* FIXME: need to provide precise fault address */ | ||
2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2326 | return ret; | ||
2327 | } | ||
2328 | |||
2329 | if (old_tss_sel != 0xffff) { | ||
2330 | tss_seg.prev_task_link = old_tss_sel; | ||
2331 | |||
2332 | ret = ops->write_std(new_tss_base, | ||
2333 | &tss_seg.prev_task_link, | ||
2334 | sizeof tss_seg.prev_task_link, | ||
2335 | ctxt->vcpu, &err); | ||
2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2337 | /* FIXME: need to provide precise fault address */ | ||
2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2339 | return ret; | ||
2340 | } | ||
2341 | } | ||
2342 | |||
2343 | return load_state_from_tss32(ctxt, ops, &tss_seg); | ||
2344 | } | ||
2345 | |||
2346 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | ||
2347 | struct x86_emulate_ops *ops, | ||
2348 | u16 tss_selector, int reason, | ||
2349 | bool has_error_code, u32 error_code) | ||
2350 | { | ||
2351 | struct desc_struct curr_tss_desc, next_tss_desc; | ||
2352 | int ret; | ||
2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | ||
2354 | ulong old_tss_base = | ||
2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | ||
2356 | u32 desc_limit; | ||
2357 | |||
2358 | /* FIXME: old_tss_base == ~0 ? */ | ||
2359 | |||
2360 | ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); | ||
2361 | if (ret != X86EMUL_CONTINUE) | ||
2362 | return ret; | ||
2363 | ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); | ||
2364 | if (ret != X86EMUL_CONTINUE) | ||
2365 | return ret; | ||
2366 | |||
2367 | /* FIXME: check that next_tss_desc is tss */ | ||
2368 | |||
2369 | if (reason != TASK_SWITCH_IRET) { | ||
2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | ||
2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | ||
2372 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2373 | return X86EMUL_PROPAGATE_FAULT; | ||
2374 | } | ||
2375 | } | ||
2376 | |||
2377 | desc_limit = desc_limit_scaled(&next_tss_desc); | ||
2378 | if (!next_tss_desc.p || | ||
2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | ||
2380 | desc_limit < 0x2b)) { | ||
2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | ||
2382 | tss_selector & 0xfffc); | ||
2383 | return X86EMUL_PROPAGATE_FAULT; | ||
2384 | } | ||
2385 | |||
2386 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
2387 | curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ | ||
2388 | write_segment_descriptor(ctxt, ops, old_tss_sel, | ||
2389 | &curr_tss_desc); | ||
2390 | } | ||
2391 | |||
2392 | if (reason == TASK_SWITCH_IRET) | ||
2393 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | ||
2394 | |||
2395 | /* set back link to prev task only if NT bit is set in eflags | ||
2396 | note that old_tss_sel is not used afetr this point */ | ||
2397 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
2398 | old_tss_sel = 0xffff; | ||
2399 | |||
2400 | if (next_tss_desc.type & 8) | ||
2401 | ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, | ||
2402 | old_tss_base, &next_tss_desc); | ||
2403 | else | ||
2404 | ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, | ||
2405 | old_tss_base, &next_tss_desc); | ||
2406 | if (ret != X86EMUL_CONTINUE) | ||
2407 | return ret; | ||
2408 | |||
2409 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) | ||
2410 | ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; | ||
2411 | |||
2412 | if (reason != TASK_SWITCH_IRET) { | ||
2413 | next_tss_desc.type |= (1 << 1); /* set busy flag */ | ||
2414 | write_segment_descriptor(ctxt, ops, tss_selector, | ||
2415 | &next_tss_desc); | ||
2416 | } | ||
2417 | |||
2418 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | ||
2419 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | ||
2420 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | ||
2421 | |||
2422 | if (has_error_code) { | ||
2423 | struct decode_cache *c = &ctxt->decode; | ||
2424 | |||
2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | ||
2426 | c->lock_prefix = 0; | ||
2427 | c->src.val = (unsigned long) error_code; | ||
2428 | emulate_push(ctxt); | ||
2429 | } | ||
2430 | |||
2431 | return ret; | ||
2432 | } | ||
2433 | |||
2434 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
2435 | struct x86_emulate_ops *ops, | ||
2436 | u16 tss_selector, int reason, | ||
2437 | bool has_error_code, u32 error_code) | ||
2438 | { | ||
2439 | struct decode_cache *c = &ctxt->decode; | ||
2440 | int rc; | ||
2441 | |||
2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
2443 | c->eip = ctxt->eip; | ||
2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
2445 | c->dst.type = OP_NONE; | ||
2446 | |||
2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | ||
2448 | has_error_code, error_code); | ||
2449 | |||
2450 | if (rc == X86EMUL_CONTINUE) { | ||
2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
2453 | rc = writeback(ctxt, ops); | ||
2454 | } | ||
2455 | |||
2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | ||
2457 | } | ||
2458 | |||
2459 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, | ||
2460 | int reg, struct operand *op) | ||
2461 | { | ||
2462 | struct decode_cache *c = &ctxt->decode; | ||
2463 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | ||
2464 | |||
2465 | register_address_increment(c, &c->regs[reg], df * op->bytes); | ||
2466 | op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); | ||
2467 | } | ||
2468 | |||
1810 | int | 2469 | int |
1811 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 2470 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1812 | { | 2471 | { |
1813 | unsigned long memop = 0; | ||
1814 | u64 msr_data; | 2472 | u64 msr_data; |
1815 | unsigned long saved_eip = 0; | ||
1816 | struct decode_cache *c = &ctxt->decode; | 2473 | struct decode_cache *c = &ctxt->decode; |
1817 | unsigned int port; | 2474 | int rc = X86EMUL_CONTINUE; |
1818 | int io_dir_in; | 2475 | int saved_dst_type = c->dst.type; |
1819 | int rc = 0; | ||
1820 | 2476 | ||
1821 | ctxt->interruptibility = 0; | 2477 | ctxt->interruptibility = 0; |
1822 | 2478 | ||
@@ -1826,26 +2482,30 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1826 | */ | 2482 | */ |
1827 | 2483 | ||
1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
1829 | saved_eip = c->eip; | 2485 | |
2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
2488 | goto done; | ||
2489 | } | ||
1830 | 2490 | ||
1831 | /* LOCK prefix is allowed only with some instructions */ | 2491 | /* LOCK prefix is allowed only with some instructions */ |
1832 | if (c->lock_prefix && !(c->d & Lock)) { | 2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1834 | goto done; | 2494 | goto done; |
1835 | } | 2495 | } |
1836 | 2496 | ||
1837 | /* Privileged instruction can be executed only in CPL=0 */ | 2497 | /* Privileged instruction can be executed only in CPL=0 */ |
1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | 2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
1839 | kvm_inject_gp(ctxt->vcpu, 0); | 2499 | kvm_inject_gp(ctxt->vcpu, 0); |
1840 | goto done; | 2500 | goto done; |
1841 | } | 2501 | } |
1842 | 2502 | ||
1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | ||
1844 | memop = c->modrm_ea; | ||
1845 | |||
1846 | if (c->rep_prefix && (c->d & String)) { | 2503 | if (c->rep_prefix && (c->d & String)) { |
2504 | ctxt->restart = true; | ||
1847 | /* All REP prefixes have the same first termination condition */ | 2505 | /* All REP prefixes have the same first termination condition */ |
1848 | if (c->regs[VCPU_REGS_RCX] == 0) { | 2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
2507 | string_done: | ||
2508 | ctxt->restart = false; | ||
1849 | kvm_rip_write(ctxt->vcpu, c->eip); | 2509 | kvm_rip_write(ctxt->vcpu, c->eip); |
1850 | goto done; | 2510 | goto done; |
1851 | } | 2511 | } |
@@ -1857,25 +2517,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1857 | * - if REPNE/REPNZ and ZF = 1 then done | 2517 | * - if REPNE/REPNZ and ZF = 1 then done |
1858 | */ | 2518 | */ |
1859 | if ((c->b == 0xa6) || (c->b == 0xa7) || | 2519 | if ((c->b == 0xa6) || (c->b == 0xa7) || |
1860 | (c->b == 0xae) || (c->b == 0xaf)) { | 2520 | (c->b == 0xae) || (c->b == 0xaf)) { |
1861 | if ((c->rep_prefix == REPE_PREFIX) && | 2521 | if ((c->rep_prefix == REPE_PREFIX) && |
1862 | ((ctxt->eflags & EFLG_ZF) == 0)) { | 2522 | ((ctxt->eflags & EFLG_ZF) == 0)) |
1863 | kvm_rip_write(ctxt->vcpu, c->eip); | 2523 | goto string_done; |
1864 | goto done; | ||
1865 | } | ||
1866 | if ((c->rep_prefix == REPNE_PREFIX) && | 2524 | if ((c->rep_prefix == REPNE_PREFIX) && |
1867 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { | 2525 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) |
1868 | kvm_rip_write(ctxt->vcpu, c->eip); | 2526 | goto string_done; |
1869 | goto done; | ||
1870 | } | ||
1871 | } | 2527 | } |
1872 | c->regs[VCPU_REGS_RCX]--; | 2528 | c->eip = ctxt->eip; |
1873 | c->eip = kvm_rip_read(ctxt->vcpu); | ||
1874 | } | 2529 | } |
1875 | 2530 | ||
1876 | if (c->src.type == OP_MEM) { | 2531 | if (c->src.type == OP_MEM) { |
1877 | c->src.ptr = (unsigned long *)memop; | ||
1878 | c->src.val = 0; | ||
1879 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
1880 | &c->src.val, | 2533 | &c->src.val, |
1881 | c->src.bytes, | 2534 | c->src.bytes, |
@@ -1885,29 +2538,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1885 | c->src.orig_val = c->src.val; | 2538 | c->src.orig_val = c->src.val; |
1886 | } | 2539 | } |
1887 | 2540 | ||
2541 | if (c->src2.type == OP_MEM) { | ||
2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | ||
2543 | &c->src2.val, | ||
2544 | c->src2.bytes, | ||
2545 | ctxt->vcpu); | ||
2546 | if (rc != X86EMUL_CONTINUE) | ||
2547 | goto done; | ||
2548 | } | ||
2549 | |||
1888 | if ((c->d & DstMask) == ImplicitOps) | 2550 | if ((c->d & DstMask) == ImplicitOps) |
1889 | goto special_insn; | 2551 | goto special_insn; |
1890 | 2552 | ||
1891 | 2553 | ||
1892 | if (c->dst.type == OP_MEM) { | 2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
1893 | c->dst.ptr = (unsigned long *)memop; | 2555 | /* optimisation - avoid slow emulated read if Mov */ |
1894 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, |
1895 | c->dst.val = 0; | 2557 | c->dst.bytes, ctxt->vcpu); |
1896 | if (c->d & BitOp) { | 2558 | if (rc != X86EMUL_CONTINUE) |
1897 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | 2559 | goto done; |
1898 | |||
1899 | c->dst.ptr = (void *)c->dst.ptr + | ||
1900 | (c->src.val & mask) / 8; | ||
1901 | } | ||
1902 | if (!(c->d & Mov)) { | ||
1903 | /* optimisation - avoid slow emulated read */ | ||
1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
1905 | &c->dst.val, | ||
1906 | c->dst.bytes, | ||
1907 | ctxt->vcpu); | ||
1908 | if (rc != X86EMUL_CONTINUE) | ||
1909 | goto done; | ||
1910 | } | ||
1911 | } | 2560 | } |
1912 | c->dst.orig_val = c->dst.val; | 2561 | c->dst.orig_val = c->dst.val; |
1913 | 2562 | ||
@@ -1926,7 +2575,7 @@ special_insn: | |||
1926 | break; | 2575 | break; |
1927 | case 0x07: /* pop es */ | 2576 | case 0x07: /* pop es */ |
1928 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
1929 | if (rc != 0) | 2578 | if (rc != X86EMUL_CONTINUE) |
1930 | goto done; | 2579 | goto done; |
1931 | break; | 2580 | break; |
1932 | case 0x08 ... 0x0d: | 2581 | case 0x08 ... 0x0d: |
@@ -1945,7 +2594,7 @@ special_insn: | |||
1945 | break; | 2594 | break; |
1946 | case 0x17: /* pop ss */ | 2595 | case 0x17: /* pop ss */ |
1947 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
1948 | if (rc != 0) | 2597 | if (rc != X86EMUL_CONTINUE) |
1949 | goto done; | 2598 | goto done; |
1950 | break; | 2599 | break; |
1951 | case 0x18 ... 0x1d: | 2600 | case 0x18 ... 0x1d: |
@@ -1957,7 +2606,7 @@ special_insn: | |||
1957 | break; | 2606 | break; |
1958 | case 0x1f: /* pop ds */ | 2607 | case 0x1f: /* pop ds */ |
1959 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
1960 | if (rc != 0) | 2609 | if (rc != X86EMUL_CONTINUE) |
1961 | goto done; | 2610 | goto done; |
1962 | break; | 2611 | break; |
1963 | case 0x20 ... 0x25: | 2612 | case 0x20 ... 0x25: |
@@ -1988,7 +2637,7 @@ special_insn: | |||
1988 | case 0x58 ... 0x5f: /* pop reg */ | 2637 | case 0x58 ... 0x5f: /* pop reg */ |
1989 | pop_instruction: | 2638 | pop_instruction: |
1990 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); | 2639 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); |
1991 | if (rc != 0) | 2640 | if (rc != X86EMUL_CONTINUE) |
1992 | goto done; | 2641 | goto done; |
1993 | break; | 2642 | break; |
1994 | case 0x60: /* pusha */ | 2643 | case 0x60: /* pusha */ |
@@ -1996,7 +2645,7 @@ special_insn: | |||
1996 | break; | 2645 | break; |
1997 | case 0x61: /* popa */ | 2646 | case 0x61: /* popa */ |
1998 | rc = emulate_popa(ctxt, ops); | 2647 | rc = emulate_popa(ctxt, ops); |
1999 | if (rc != 0) | 2648 | if (rc != X86EMUL_CONTINUE) |
2000 | goto done; | 2649 | goto done; |
2001 | break; | 2650 | break; |
2002 | case 0x63: /* movsxd */ | 2651 | case 0x63: /* movsxd */ |
@@ -2010,47 +2659,29 @@ special_insn: | |||
2010 | break; | 2659 | break; |
2011 | case 0x6c: /* insb */ | 2660 | case 0x6c: /* insb */ |
2012 | case 0x6d: /* insw/insd */ | 2661 | case 0x6d: /* insw/insd */ |
2662 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2664 | c->dst.bytes)) { |
2015 | kvm_inject_gp(ctxt->vcpu, 0); | 2665 | kvm_inject_gp(ctxt->vcpu, 0); |
2016 | goto done; | 2666 | goto done; |
2017 | } | 2667 | } |
2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
2019 | 1, | 2669 | c->regs[VCPU_REGS_RDX], &c->dst.val)) |
2020 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2670 | goto done; /* IO is needed, skip writeback */ |
2021 | c->rep_prefix ? | 2671 | break; |
2022 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | ||
2023 | (ctxt->eflags & EFLG_DF), | ||
2024 | register_address(c, es_base(ctxt), | ||
2025 | c->regs[VCPU_REGS_RDI]), | ||
2026 | c->rep_prefix, | ||
2027 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
2028 | c->eip = saved_eip; | ||
2029 | return -1; | ||
2030 | } | ||
2031 | return 0; | ||
2032 | case 0x6e: /* outsb */ | 2672 | case 0x6e: /* outsb */ |
2033 | case 0x6f: /* outsw/outsd */ | 2673 | case 0x6f: /* outsw/outsd */ |
2674 | c->src.bytes = min(c->src.bytes, 4u); | ||
2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2676 | c->src.bytes)) { |
2036 | kvm_inject_gp(ctxt->vcpu, 0); | 2677 | kvm_inject_gp(ctxt->vcpu, 0); |
2037 | goto done; | 2678 | goto done; |
2038 | } | 2679 | } |
2039 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
2040 | 0, | 2681 | &c->src.val, 1, ctxt->vcpu); |
2041 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2682 | |
2042 | c->rep_prefix ? | 2683 | c->dst.type = OP_NONE; /* nothing to writeback */ |
2043 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | 2684 | break; |
2044 | (ctxt->eflags & EFLG_DF), | ||
2045 | register_address(c, | ||
2046 | seg_override_base(ctxt, c), | ||
2047 | c->regs[VCPU_REGS_RSI]), | ||
2048 | c->rep_prefix, | ||
2049 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
2050 | c->eip = saved_eip; | ||
2051 | return -1; | ||
2052 | } | ||
2053 | return 0; | ||
2054 | case 0x70 ... 0x7f: /* jcc (short) */ | 2685 | case 0x70 ... 0x7f: /* jcc (short) */ |
2055 | if (test_cc(c->b, ctxt->eflags)) | 2686 | if (test_cc(c->b, ctxt->eflags)) |
2056 | jmp_rel(c, c->src.val); | 2687 | jmp_rel(c, c->src.val); |
@@ -2107,12 +2738,11 @@ special_insn: | |||
2107 | case 0x8c: { /* mov r/m, sreg */ | 2738 | case 0x8c: { /* mov r/m, sreg */ |
2108 | struct kvm_segment segreg; | 2739 | struct kvm_segment segreg; |
2109 | 2740 | ||
2110 | if (c->modrm_reg <= 5) | 2741 | if (c->modrm_reg <= VCPU_SREG_GS) |
2111 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | 2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); |
2112 | else { | 2743 | else { |
2113 | printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", | 2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2114 | c->modrm); | 2745 | goto done; |
2115 | goto cannot_emulate; | ||
2116 | } | 2746 | } |
2117 | c->dst.val = segreg.selector; | 2747 | c->dst.val = segreg.selector; |
2118 | break; | 2748 | break; |
@@ -2132,16 +2762,16 @@ special_insn: | |||
2132 | } | 2762 | } |
2133 | 2763 | ||
2134 | if (c->modrm_reg == VCPU_SREG_SS) | 2764 | if (c->modrm_reg == VCPU_SREG_SS) |
2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | 2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); |
2136 | 2766 | ||
2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | 2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
2138 | 2768 | ||
2139 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2769 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2140 | break; | 2770 | break; |
2141 | } | 2771 | } |
2142 | case 0x8f: /* pop (sole member of Grp1a) */ | 2772 | case 0x8f: /* pop (sole member of Grp1a) */ |
2143 | rc = emulate_grp1a(ctxt, ops); | 2773 | rc = emulate_grp1a(ctxt, ops); |
2144 | if (rc != 0) | 2774 | if (rc != X86EMUL_CONTINUE) |
2145 | goto done; | 2775 | goto done; |
2146 | break; | 2776 | break; |
2147 | case 0x90: /* nop / xchg r8,rax */ | 2777 | case 0x90: /* nop / xchg r8,rax */ |
@@ -2175,89 +2805,16 @@ special_insn: | |||
2175 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | 2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; |
2176 | break; | 2806 | break; |
2177 | case 0xa4 ... 0xa5: /* movs */ | 2807 | case 0xa4 ... 0xa5: /* movs */ |
2178 | c->dst.type = OP_MEM; | 2808 | goto mov; |
2179 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2180 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2181 | es_base(ctxt), | ||
2182 | c->regs[VCPU_REGS_RDI]); | ||
2183 | rc = ops->read_emulated(register_address(c, | ||
2184 | seg_override_base(ctxt, c), | ||
2185 | c->regs[VCPU_REGS_RSI]), | ||
2186 | &c->dst.val, | ||
2187 | c->dst.bytes, ctxt->vcpu); | ||
2188 | if (rc != X86EMUL_CONTINUE) | ||
2189 | goto done; | ||
2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2192 | : c->dst.bytes); | ||
2193 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2194 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2195 | : c->dst.bytes); | ||
2196 | break; | ||
2197 | case 0xa6 ... 0xa7: /* cmps */ | 2809 | case 0xa6 ... 0xa7: /* cmps */ |
2198 | c->src.type = OP_NONE; /* Disable writeback. */ | ||
2199 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2200 | c->src.ptr = (unsigned long *)register_address(c, | ||
2201 | seg_override_base(ctxt, c), | ||
2202 | c->regs[VCPU_REGS_RSI]); | ||
2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, | ||
2204 | &c->src.val, | ||
2205 | c->src.bytes, | ||
2206 | ctxt->vcpu); | ||
2207 | if (rc != X86EMUL_CONTINUE) | ||
2208 | goto done; | ||
2209 | |||
2210 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2810 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2211 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2212 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2213 | es_base(ctxt), | ||
2214 | c->regs[VCPU_REGS_RDI]); | ||
2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
2216 | &c->dst.val, | ||
2217 | c->dst.bytes, | ||
2218 | ctxt->vcpu); | ||
2219 | if (rc != X86EMUL_CONTINUE) | ||
2220 | goto done; | ||
2221 | |||
2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
2223 | 2812 | goto cmp; | |
2224 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
2225 | |||
2226 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2227 | (ctxt->eflags & EFLG_DF) ? -c->src.bytes | ||
2228 | : c->src.bytes); | ||
2229 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2230 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2231 | : c->dst.bytes); | ||
2232 | |||
2233 | break; | ||
2234 | case 0xaa ... 0xab: /* stos */ | 2813 | case 0xaa ... 0xab: /* stos */ |
2235 | c->dst.type = OP_MEM; | ||
2236 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2237 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2238 | es_base(ctxt), | ||
2239 | c->regs[VCPU_REGS_RDI]); | ||
2240 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
2241 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2242 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2243 | : c->dst.bytes); | ||
2244 | break; | 2815 | break; |
2245 | case 0xac ... 0xad: /* lods */ | 2816 | case 0xac ... 0xad: /* lods */ |
2246 | c->dst.type = OP_REG; | 2817 | goto mov; |
2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
2249 | rc = ops->read_emulated(register_address(c, | ||
2250 | seg_override_base(ctxt, c), | ||
2251 | c->regs[VCPU_REGS_RSI]), | ||
2252 | &c->dst.val, | ||
2253 | c->dst.bytes, | ||
2254 | ctxt->vcpu); | ||
2255 | if (rc != X86EMUL_CONTINUE) | ||
2256 | goto done; | ||
2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2259 | : c->dst.bytes); | ||
2260 | break; | ||
2261 | case 0xae ... 0xaf: /* scas */ | 2818 | case 0xae ... 0xaf: /* scas */ |
2262 | DPRINTF("Urk! I don't handle SCAS.\n"); | 2819 | DPRINTF("Urk! I don't handle SCAS.\n"); |
2263 | goto cannot_emulate; | 2820 | goto cannot_emulate; |
@@ -2277,7 +2834,7 @@ special_insn: | |||
2277 | break; | 2834 | break; |
2278 | case 0xcb: /* ret far */ | 2835 | case 0xcb: /* ret far */ |
2279 | rc = emulate_ret_far(ctxt, ops); | 2836 | rc = emulate_ret_far(ctxt, ops); |
2280 | if (rc) | 2837 | if (rc != X86EMUL_CONTINUE) |
2281 | goto done; | 2838 | goto done; |
2282 | break; | 2839 | break; |
2283 | case 0xd0 ... 0xd1: /* Grp2 */ | 2840 | case 0xd0 ... 0xd1: /* Grp2 */ |
@@ -2290,14 +2847,10 @@ special_insn: | |||
2290 | break; | 2847 | break; |
2291 | case 0xe4: /* inb */ | 2848 | case 0xe4: /* inb */ |
2292 | case 0xe5: /* in */ | 2849 | case 0xe5: /* in */ |
2293 | port = c->src.val; | 2850 | goto do_io_in; |
2294 | io_dir_in = 1; | ||
2295 | goto do_io; | ||
2296 | case 0xe6: /* outb */ | 2851 | case 0xe6: /* outb */ |
2297 | case 0xe7: /* out */ | 2852 | case 0xe7: /* out */ |
2298 | port = c->src.val; | 2853 | goto do_io_out; |
2299 | io_dir_in = 0; | ||
2300 | goto do_io; | ||
2301 | case 0xe8: /* call (near) */ { | 2854 | case 0xe8: /* call (near) */ { |
2302 | long int rel = c->src.val; | 2855 | long int rel = c->src.val; |
2303 | c->src.val = (unsigned long) c->eip; | 2856 | c->src.val = (unsigned long) c->eip; |
@@ -2308,8 +2861,9 @@ special_insn: | |||
2308 | case 0xe9: /* jmp rel */ | 2861 | case 0xe9: /* jmp rel */ |
2309 | goto jmp; | 2862 | goto jmp; |
2310 | case 0xea: /* jmp far */ | 2863 | case 0xea: /* jmp far */ |
2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, | 2864 | jump_far: |
2312 | VCPU_SREG_CS)) | 2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, |
2866 | VCPU_SREG_CS)) | ||
2313 | goto done; | 2867 | goto done; |
2314 | 2868 | ||
2315 | c->eip = c->src.val; | 2869 | c->eip = c->src.val; |
@@ -2321,25 +2875,29 @@ special_insn: | |||
2321 | break; | 2875 | break; |
2322 | case 0xec: /* in al,dx */ | 2876 | case 0xec: /* in al,dx */ |
2323 | case 0xed: /* in (e/r)ax,dx */ | 2877 | case 0xed: /* in (e/r)ax,dx */ |
2324 | port = c->regs[VCPU_REGS_RDX]; | 2878 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2325 | io_dir_in = 1; | 2879 | do_io_in: |
2326 | goto do_io; | 2880 | c->dst.bytes = min(c->dst.bytes, 4u); |
2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | ||
2882 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2883 | goto done; | ||
2884 | } | ||
2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | ||
2886 | &c->dst.val)) | ||
2887 | goto done; /* IO is needed */ | ||
2888 | break; | ||
2327 | case 0xee: /* out al,dx */ | 2889 | case 0xee: /* out al,dx */ |
2328 | case 0xef: /* out (e/r)ax,dx */ | 2890 | case 0xef: /* out (e/r)ax,dx */ |
2329 | port = c->regs[VCPU_REGS_RDX]; | 2891 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2330 | io_dir_in = 0; | 2892 | do_io_out: |
2331 | do_io: | 2893 | c->dst.bytes = min(c->dst.bytes, 4u); |
2332 | if (!emulator_io_permited(ctxt, ops, port, | 2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2334 | kvm_inject_gp(ctxt->vcpu, 0); | 2895 | kvm_inject_gp(ctxt->vcpu, 0); |
2335 | goto done; | 2896 | goto done; |
2336 | } | 2897 | } |
2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | 2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
2338 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2899 | ctxt->vcpu); |
2339 | port) != 0) { | 2900 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2340 | c->eip = saved_eip; | ||
2341 | goto cannot_emulate; | ||
2342 | } | ||
2343 | break; | 2901 | break; |
2344 | case 0xf4: /* hlt */ | 2902 | case 0xf4: /* hlt */ |
2345 | ctxt->vcpu->arch.halt_request = 1; | 2903 | ctxt->vcpu->arch.halt_request = 1; |
@@ -2350,16 +2908,15 @@ special_insn: | |||
2350 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2908 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2351 | break; | 2909 | break; |
2352 | case 0xf6 ... 0xf7: /* Grp3 */ | 2910 | case 0xf6 ... 0xf7: /* Grp3 */ |
2353 | rc = emulate_grp3(ctxt, ops); | 2911 | if (!emulate_grp3(ctxt, ops)) |
2354 | if (rc != 0) | 2912 | goto cannot_emulate; |
2355 | goto done; | ||
2356 | break; | 2913 | break; |
2357 | case 0xf8: /* clc */ | 2914 | case 0xf8: /* clc */ |
2358 | ctxt->eflags &= ~EFLG_CF; | 2915 | ctxt->eflags &= ~EFLG_CF; |
2359 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2916 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2360 | break; | 2917 | break; |
2361 | case 0xfa: /* cli */ | 2918 | case 0xfa: /* cli */ |
2362 | if (emulator_bad_iopl(ctxt)) | 2919 | if (emulator_bad_iopl(ctxt, ops)) |
2363 | kvm_inject_gp(ctxt->vcpu, 0); | 2920 | kvm_inject_gp(ctxt->vcpu, 0); |
2364 | else { | 2921 | else { |
2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2922 | ctxt->eflags &= ~X86_EFLAGS_IF; |
@@ -2367,10 +2924,10 @@ special_insn: | |||
2367 | } | 2924 | } |
2368 | break; | 2925 | break; |
2369 | case 0xfb: /* sti */ | 2926 | case 0xfb: /* sti */ |
2370 | if (emulator_bad_iopl(ctxt)) | 2927 | if (emulator_bad_iopl(ctxt, ops)) |
2371 | kvm_inject_gp(ctxt->vcpu, 0); | 2928 | kvm_inject_gp(ctxt->vcpu, 0); |
2372 | else { | 2929 | else { |
2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); |
2374 | ctxt->eflags |= X86_EFLAGS_IF; | 2931 | ctxt->eflags |= X86_EFLAGS_IF; |
2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2932 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2376 | } | 2933 | } |
@@ -2383,28 +2940,55 @@ special_insn: | |||
2383 | ctxt->eflags |= EFLG_DF; | 2940 | ctxt->eflags |= EFLG_DF; |
2384 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2941 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2385 | break; | 2942 | break; |
2386 | case 0xfe ... 0xff: /* Grp4/Grp5 */ | 2943 | case 0xfe: /* Grp4 */ |
2944 | grp45: | ||
2387 | rc = emulate_grp45(ctxt, ops); | 2945 | rc = emulate_grp45(ctxt, ops); |
2388 | if (rc != 0) | 2946 | if (rc != X86EMUL_CONTINUE) |
2389 | goto done; | 2947 | goto done; |
2390 | break; | 2948 | break; |
2949 | case 0xff: /* Grp5 */ | ||
2950 | if (c->modrm_reg == 5) | ||
2951 | goto jump_far; | ||
2952 | goto grp45; | ||
2391 | } | 2953 | } |
2392 | 2954 | ||
2393 | writeback: | 2955 | writeback: |
2394 | rc = writeback(ctxt, ops); | 2956 | rc = writeback(ctxt, ops); |
2395 | if (rc != 0) | 2957 | if (rc != X86EMUL_CONTINUE) |
2396 | goto done; | 2958 | goto done; |
2397 | 2959 | ||
2960 | /* | ||
2961 | * restore dst type in case the decoding will be reused | ||
2962 | * (happens for string instruction ) | ||
2963 | */ | ||
2964 | c->dst.type = saved_dst_type; | ||
2965 | |||
2966 | if ((c->d & SrcMask) == SrcSI) | ||
2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | ||
2968 | &c->src); | ||
2969 | |||
2970 | if ((c->d & DstMask) == DstDI) | ||
2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | ||
2972 | |||
2973 | if (c->rep_prefix && (c->d & String)) { | ||
2974 | struct read_cache *rc = &ctxt->decode.io_read; | ||
2975 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | ||
2976 | /* | ||
2977 | * Re-enter guest when pio read ahead buffer is empty or, | ||
2978 | * if it is not used, after each 1024 iteration. | ||
2979 | */ | ||
2980 | if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || | ||
2981 | (rc->end != 0 && rc->end == rc->pos)) | ||
2982 | ctxt->restart = false; | ||
2983 | } | ||
2984 | |||
2398 | /* Commit shadow register state. */ | 2985 | /* Commit shadow register state. */ |
2399 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); |
2400 | kvm_rip_write(ctxt->vcpu, c->eip); | 2987 | kvm_rip_write(ctxt->vcpu, c->eip); |
2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | ||
2401 | 2989 | ||
2402 | done: | 2990 | done: |
2403 | if (rc == X86EMUL_UNHANDLEABLE) { | 2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
2404 | c->eip = saved_eip; | ||
2405 | return -1; | ||
2406 | } | ||
2407 | return 0; | ||
2408 | 2992 | ||
2409 | twobyte_insn: | 2993 | twobyte_insn: |
2410 | switch (c->b) { | 2994 | switch (c->b) { |
@@ -2418,18 +3002,18 @@ twobyte_insn: | |||
2418 | goto cannot_emulate; | 3002 | goto cannot_emulate; |
2419 | 3003 | ||
2420 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3004 | rc = kvm_fix_hypercall(ctxt->vcpu); |
2421 | if (rc) | 3005 | if (rc != X86EMUL_CONTINUE) |
2422 | goto done; | 3006 | goto done; |
2423 | 3007 | ||
2424 | /* Let the processor re-execute the fixed hypercall */ | 3008 | /* Let the processor re-execute the fixed hypercall */ |
2425 | c->eip = kvm_rip_read(ctxt->vcpu); | 3009 | c->eip = ctxt->eip; |
2426 | /* Disable writeback. */ | 3010 | /* Disable writeback. */ |
2427 | c->dst.type = OP_NONE; | 3011 | c->dst.type = OP_NONE; |
2428 | break; | 3012 | break; |
2429 | case 2: /* lgdt */ | 3013 | case 2: /* lgdt */ |
2430 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3014 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
2431 | &size, &address, c->op_bytes); | 3015 | &size, &address, c->op_bytes); |
2432 | if (rc) | 3016 | if (rc != X86EMUL_CONTINUE) |
2433 | goto done; | 3017 | goto done; |
2434 | realmode_lgdt(ctxt->vcpu, size, address); | 3018 | realmode_lgdt(ctxt->vcpu, size, address); |
2435 | /* Disable writeback. */ | 3019 | /* Disable writeback. */ |
@@ -2440,7 +3024,7 @@ twobyte_insn: | |||
2440 | switch (c->modrm_rm) { | 3024 | switch (c->modrm_rm) { |
2441 | case 1: | 3025 | case 1: |
2442 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3026 | rc = kvm_fix_hypercall(ctxt->vcpu); |
2443 | if (rc) | 3027 | if (rc != X86EMUL_CONTINUE) |
2444 | goto done; | 3028 | goto done; |
2445 | break; | 3029 | break; |
2446 | default: | 3030 | default: |
@@ -2450,7 +3034,7 @@ twobyte_insn: | |||
2450 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3034 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
2451 | &size, &address, | 3035 | &size, &address, |
2452 | c->op_bytes); | 3036 | c->op_bytes); |
2453 | if (rc) | 3037 | if (rc != X86EMUL_CONTINUE) |
2454 | goto done; | 3038 | goto done; |
2455 | realmode_lidt(ctxt->vcpu, size, address); | 3039 | realmode_lidt(ctxt->vcpu, size, address); |
2456 | } | 3040 | } |
@@ -2459,15 +3043,18 @@ twobyte_insn: | |||
2459 | break; | 3043 | break; |
2460 | case 4: /* smsw */ | 3044 | case 4: /* smsw */ |
2461 | c->dst.bytes = 2; | 3045 | c->dst.bytes = 2; |
2462 | c->dst.val = realmode_get_cr(ctxt->vcpu, 0); | 3046 | c->dst.val = ops->get_cr(0, ctxt->vcpu); |
2463 | break; | 3047 | break; |
2464 | case 6: /* lmsw */ | 3048 | case 6: /* lmsw */ |
2465 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 3049 | ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | |
2466 | &ctxt->eflags); | 3050 | (c->src.val & 0x0f), ctxt->vcpu); |
2467 | c->dst.type = OP_NONE; | 3051 | c->dst.type = OP_NONE; |
2468 | break; | 3052 | break; |
3053 | case 5: /* not defined */ | ||
3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
3055 | goto done; | ||
2469 | case 7: /* invlpg*/ | 3056 | case 7: /* invlpg*/ |
2470 | emulate_invlpg(ctxt->vcpu, memop); | 3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
2471 | /* Disable writeback. */ | 3058 | /* Disable writeback. */ |
2472 | c->dst.type = OP_NONE; | 3059 | c->dst.type = OP_NONE; |
2473 | break; | 3060 | break; |
@@ -2493,54 +3080,54 @@ twobyte_insn: | |||
2493 | c->dst.type = OP_NONE; | 3080 | c->dst.type = OP_NONE; |
2494 | break; | 3081 | break; |
2495 | case 0x20: /* mov cr, reg */ | 3082 | case 0x20: /* mov cr, reg */ |
2496 | if (c->modrm_mod != 3) | 3083 | switch (c->modrm_reg) { |
2497 | goto cannot_emulate; | 3084 | case 1: |
2498 | c->regs[c->modrm_rm] = | 3085 | case 5 ... 7: |
2499 | realmode_get_cr(ctxt->vcpu, c->modrm_reg); | 3086 | case 9 ... 15: |
3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
3088 | goto done; | ||
3089 | } | ||
3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | ||
2500 | c->dst.type = OP_NONE; /* no writeback */ | 3091 | c->dst.type = OP_NONE; /* no writeback */ |
2501 | break; | 3092 | break; |
2502 | case 0x21: /* mov from dr to reg */ | 3093 | case 0x21: /* mov from dr to reg */ |
2503 | if (c->modrm_mod != 3) | 3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
2504 | goto cannot_emulate; | 3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
2505 | rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2506 | if (rc) | 3097 | goto done; |
2507 | goto cannot_emulate; | 3098 | } |
3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | ||
2508 | c->dst.type = OP_NONE; /* no writeback */ | 3100 | c->dst.type = OP_NONE; /* no writeback */ |
2509 | break; | 3101 | break; |
2510 | case 0x22: /* mov reg, cr */ | 3102 | case 0x22: /* mov reg, cr */ |
2511 | if (c->modrm_mod != 3) | 3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); |
2512 | goto cannot_emulate; | ||
2513 | realmode_set_cr(ctxt->vcpu, | ||
2514 | c->modrm_reg, c->modrm_val, &ctxt->eflags); | ||
2515 | c->dst.type = OP_NONE; | 3104 | c->dst.type = OP_NONE; |
2516 | break; | 3105 | break; |
2517 | case 0x23: /* mov from reg to dr */ | 3106 | case 0x23: /* mov from reg to dr */ |
2518 | if (c->modrm_mod != 3) | 3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
2519 | goto cannot_emulate; | 3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
2520 | rc = emulator_set_dr(ctxt, c->modrm_reg, | 3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2521 | c->regs[c->modrm_rm]); | 3110 | goto done; |
2522 | if (rc) | 3111 | } |
2523 | goto cannot_emulate; | 3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); |
2524 | c->dst.type = OP_NONE; /* no writeback */ | 3113 | c->dst.type = OP_NONE; /* no writeback */ |
2525 | break; | 3114 | break; |
2526 | case 0x30: | 3115 | case 0x30: |
2527 | /* wrmsr */ | 3116 | /* wrmsr */ |
2528 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
2529 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
2530 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); | 3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
2531 | if (rc) { | ||
2532 | kvm_inject_gp(ctxt->vcpu, 0); | 3120 | kvm_inject_gp(ctxt->vcpu, 0); |
2533 | c->eip = kvm_rip_read(ctxt->vcpu); | 3121 | goto done; |
2534 | } | 3122 | } |
2535 | rc = X86EMUL_CONTINUE; | 3123 | rc = X86EMUL_CONTINUE; |
2536 | c->dst.type = OP_NONE; | 3124 | c->dst.type = OP_NONE; |
2537 | break; | 3125 | break; |
2538 | case 0x32: | 3126 | case 0x32: |
2539 | /* rdmsr */ | 3127 | /* rdmsr */ |
2540 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); | 3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
2541 | if (rc) { | ||
2542 | kvm_inject_gp(ctxt->vcpu, 0); | 3129 | kvm_inject_gp(ctxt->vcpu, 0); |
2543 | c->eip = kvm_rip_read(ctxt->vcpu); | 3130 | goto done; |
2544 | } else { | 3131 | } else { |
2545 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
2546 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 3133 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; |
@@ -2577,7 +3164,7 @@ twobyte_insn: | |||
2577 | break; | 3164 | break; |
2578 | case 0xa1: /* pop fs */ | 3165 | case 0xa1: /* pop fs */ |
2579 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
2580 | if (rc != 0) | 3167 | if (rc != X86EMUL_CONTINUE) |
2581 | goto done; | 3168 | goto done; |
2582 | break; | 3169 | break; |
2583 | case 0xa3: | 3170 | case 0xa3: |
@@ -2596,7 +3183,7 @@ twobyte_insn: | |||
2596 | break; | 3183 | break; |
2597 | case 0xa9: /* pop gs */ | 3184 | case 0xa9: /* pop gs */ |
2598 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
2599 | if (rc != 0) | 3186 | if (rc != X86EMUL_CONTINUE) |
2600 | goto done; | 3187 | goto done; |
2601 | break; | 3188 | break; |
2602 | case 0xab: | 3189 | case 0xab: |
@@ -2668,16 +3255,14 @@ twobyte_insn: | |||
2668 | (u64) c->src.val; | 3255 | (u64) c->src.val; |
2669 | break; | 3256 | break; |
2670 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 3257 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
2671 | rc = emulate_grp9(ctxt, ops, memop); | 3258 | rc = emulate_grp9(ctxt, ops); |
2672 | if (rc != 0) | 3259 | if (rc != X86EMUL_CONTINUE) |
2673 | goto done; | 3260 | goto done; |
2674 | c->dst.type = OP_NONE; | ||
2675 | break; | 3261 | break; |
2676 | } | 3262 | } |
2677 | goto writeback; | 3263 | goto writeback; |
2678 | 3264 | ||
2679 | cannot_emulate: | 3265 | cannot_emulate: |
2680 | DPRINTF("Cannot emulate %02x\n", c->b); | 3266 | DPRINTF("Cannot emulate %02x\n", c->b); |
2681 | c->eip = saved_eip; | ||
2682 | return -1; | 3267 | return -1; |
2683 | } | 3268 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index a790fa128a9f..93825ff3338f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -33,6 +33,29 @@ | |||
33 | #include <linux/kvm_host.h> | 33 | #include <linux/kvm_host.h> |
34 | #include "trace.h" | 34 | #include "trace.h" |
35 | 35 | ||
36 | static void pic_lock(struct kvm_pic *s) | ||
37 | __acquires(&s->lock) | ||
38 | { | ||
39 | raw_spin_lock(&s->lock); | ||
40 | } | ||
41 | |||
42 | static void pic_unlock(struct kvm_pic *s) | ||
43 | __releases(&s->lock) | ||
44 | { | ||
45 | bool wakeup = s->wakeup_needed; | ||
46 | struct kvm_vcpu *vcpu; | ||
47 | |||
48 | s->wakeup_needed = false; | ||
49 | |||
50 | raw_spin_unlock(&s->lock); | ||
51 | |||
52 | if (wakeup) { | ||
53 | vcpu = s->kvm->bsp_vcpu; | ||
54 | if (vcpu) | ||
55 | kvm_vcpu_kick(vcpu); | ||
56 | } | ||
57 | } | ||
58 | |||
36 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 59 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
37 | { | 60 | { |
38 | s->isr &= ~(1 << irq); | 61 | s->isr &= ~(1 << irq); |
@@ -45,19 +68,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
45 | * Other interrupt may be delivered to PIC while lock is dropped but | 68 | * Other interrupt may be delivered to PIC while lock is dropped but |
46 | * it should be safe since PIC state is already updated at this stage. | 69 | * it should be safe since PIC state is already updated at this stage. |
47 | */ | 70 | */ |
48 | raw_spin_unlock(&s->pics_state->lock); | 71 | pic_unlock(s->pics_state); |
49 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 72 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
50 | raw_spin_lock(&s->pics_state->lock); | 73 | pic_lock(s->pics_state); |
51 | } | 74 | } |
52 | 75 | ||
53 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 76 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
54 | { | 77 | { |
55 | struct kvm_pic *s = pic_irqchip(kvm); | 78 | struct kvm_pic *s = pic_irqchip(kvm); |
56 | 79 | ||
57 | raw_spin_lock(&s->lock); | 80 | pic_lock(s); |
58 | s->pics[0].isr_ack = 0xff; | 81 | s->pics[0].isr_ack = 0xff; |
59 | s->pics[1].isr_ack = 0xff; | 82 | s->pics[1].isr_ack = 0xff; |
60 | raw_spin_unlock(&s->lock); | 83 | pic_unlock(s); |
61 | } | 84 | } |
62 | 85 | ||
63 | /* | 86 | /* |
@@ -158,9 +181,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
158 | 181 | ||
159 | void kvm_pic_update_irq(struct kvm_pic *s) | 182 | void kvm_pic_update_irq(struct kvm_pic *s) |
160 | { | 183 | { |
161 | raw_spin_lock(&s->lock); | 184 | pic_lock(s); |
162 | pic_update_irq(s); | 185 | pic_update_irq(s); |
163 | raw_spin_unlock(&s->lock); | 186 | pic_unlock(s); |
164 | } | 187 | } |
165 | 188 | ||
166 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 189 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
@@ -168,14 +191,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
168 | struct kvm_pic *s = opaque; | 191 | struct kvm_pic *s = opaque; |
169 | int ret = -1; | 192 | int ret = -1; |
170 | 193 | ||
171 | raw_spin_lock(&s->lock); | 194 | pic_lock(s); |
172 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 195 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
173 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 196 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
174 | pic_update_irq(s); | 197 | pic_update_irq(s); |
175 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 198 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
176 | s->pics[irq >> 3].imr, ret == 0); | 199 | s->pics[irq >> 3].imr, ret == 0); |
177 | } | 200 | } |
178 | raw_spin_unlock(&s->lock); | 201 | pic_unlock(s); |
179 | 202 | ||
180 | return ret; | 203 | return ret; |
181 | } | 204 | } |
@@ -205,7 +228,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
205 | int irq, irq2, intno; | 228 | int irq, irq2, intno; |
206 | struct kvm_pic *s = pic_irqchip(kvm); | 229 | struct kvm_pic *s = pic_irqchip(kvm); |
207 | 230 | ||
208 | raw_spin_lock(&s->lock); | 231 | pic_lock(s); |
209 | irq = pic_get_irq(&s->pics[0]); | 232 | irq = pic_get_irq(&s->pics[0]); |
210 | if (irq >= 0) { | 233 | if (irq >= 0) { |
211 | pic_intack(&s->pics[0], irq); | 234 | pic_intack(&s->pics[0], irq); |
@@ -230,7 +253,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
230 | intno = s->pics[0].irq_base + irq; | 253 | intno = s->pics[0].irq_base + irq; |
231 | } | 254 | } |
232 | pic_update_irq(s); | 255 | pic_update_irq(s); |
233 | raw_spin_unlock(&s->lock); | 256 | pic_unlock(s); |
234 | 257 | ||
235 | return intno; | 258 | return intno; |
236 | } | 259 | } |
@@ -444,7 +467,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
444 | printk(KERN_ERR "PIC: non byte write\n"); | 467 | printk(KERN_ERR "PIC: non byte write\n"); |
445 | return 0; | 468 | return 0; |
446 | } | 469 | } |
447 | raw_spin_lock(&s->lock); | 470 | pic_lock(s); |
448 | switch (addr) { | 471 | switch (addr) { |
449 | case 0x20: | 472 | case 0x20: |
450 | case 0x21: | 473 | case 0x21: |
@@ -457,7 +480,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 480 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
458 | break; | 481 | break; |
459 | } | 482 | } |
460 | raw_spin_unlock(&s->lock); | 483 | pic_unlock(s); |
461 | return 0; | 484 | return 0; |
462 | } | 485 | } |
463 | 486 | ||
@@ -474,7 +497,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
474 | printk(KERN_ERR "PIC: non byte read\n"); | 497 | printk(KERN_ERR "PIC: non byte read\n"); |
475 | return 0; | 498 | return 0; |
476 | } | 499 | } |
477 | raw_spin_lock(&s->lock); | 500 | pic_lock(s); |
478 | switch (addr) { | 501 | switch (addr) { |
479 | case 0x20: | 502 | case 0x20: |
480 | case 0x21: | 503 | case 0x21: |
@@ -488,7 +511,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
488 | break; | 511 | break; |
489 | } | 512 | } |
490 | *(unsigned char *)val = data; | 513 | *(unsigned char *)val = data; |
491 | raw_spin_unlock(&s->lock); | 514 | pic_unlock(s); |
492 | return 0; | 515 | return 0; |
493 | } | 516 | } |
494 | 517 | ||
@@ -505,7 +528,7 @@ static void pic_irq_request(void *opaque, int level) | |||
505 | s->output = level; | 528 | s->output = level; |
506 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | 529 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
507 | s->pics[0].isr_ack &= ~(1 << irq); | 530 | s->pics[0].isr_ack &= ~(1 << irq); |
508 | kvm_vcpu_kick(vcpu); | 531 | s->wakeup_needed = true; |
509 | } | 532 | } |
510 | } | 533 | } |
511 | 534 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 34b15915754d..cd1f362f413d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -63,6 +63,7 @@ struct kvm_kpic_state { | |||
63 | 63 | ||
64 | struct kvm_pic { | 64 | struct kvm_pic { |
65 | raw_spinlock_t lock; | 65 | raw_spinlock_t lock; |
66 | bool wakeup_needed; | ||
66 | unsigned pending_acks; | 67 | unsigned pending_acks; |
67 | struct kvm *kvm; | 68 | struct kvm *kvm; |
68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 55c7524dda54..64bc6ea78d90 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h | |||
@@ -10,9 +10,7 @@ struct kvm_timer { | |||
10 | }; | 10 | }; |
11 | 11 | ||
12 | struct kvm_timer_ops { | 12 | struct kvm_timer_ops { |
13 | bool (*is_periodic)(struct kvm_timer *); | 13 | bool (*is_periodic)(struct kvm_timer *); |
14 | }; | 14 | }; |
15 | 15 | ||
16 | |||
17 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | 16 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); |
18 | |||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 48aeee8eefb0..ddfa8658fb6d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644); | |||
148 | 148 | ||
149 | #include <trace/events/kvm.h> | 149 | #include <trace/events/kvm.h> |
150 | 150 | ||
151 | #undef TRACE_INCLUDE_FILE | ||
152 | #define CREATE_TRACE_POINTS | 151 | #define CREATE_TRACE_POINTS |
153 | #include "mmutrace.h" | 152 | #include "mmutrace.h" |
154 | 153 | ||
@@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
174 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
175 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
176 | 175 | ||
177 | 176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | |
178 | struct kvm_unsync_walk { | ||
179 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
180 | }; | ||
181 | |||
182 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | ||
183 | 177 | ||
184 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
185 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
@@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | |||
327 | page = alloc_page(GFP_KERNEL); | 321 | page = alloc_page(GFP_KERNEL); |
328 | if (!page) | 322 | if (!page) |
329 | return -ENOMEM; | 323 | return -ENOMEM; |
330 | set_page_private(page, 0); | ||
331 | cache->objects[cache->nobjs++] = page_address(page); | 324 | cache->objects[cache->nobjs++] = page_address(page); |
332 | } | 325 | } |
333 | return 0; | 326 | return 0; |
@@ -438,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
438 | int i; | 431 | int i; |
439 | 432 | ||
440 | gfn = unalias_gfn(kvm, gfn); | 433 | gfn = unalias_gfn(kvm, gfn); |
434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
441 | for (i = PT_DIRECTORY_LEVEL; | 435 | for (i = PT_DIRECTORY_LEVEL; |
442 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
443 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
444 | write_count = slot_largepage_idx(gfn, slot, i); | 437 | write_count = slot_largepage_idx(gfn, slot, i); |
445 | *write_count -= 1; | 438 | *write_count -= 1; |
446 | WARN_ON(*write_count < 0); | 439 | WARN_ON(*write_count < 0); |
@@ -654,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
654 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
655 | { | 648 | { |
656 | struct kvm_rmap_desc *desc; | 649 | struct kvm_rmap_desc *desc; |
657 | struct kvm_rmap_desc *prev_desc; | ||
658 | u64 *prev_spte; | 650 | u64 *prev_spte; |
659 | int i; | 651 | int i; |
660 | 652 | ||
@@ -666,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | |||
666 | return NULL; | 658 | return NULL; |
667 | } | 659 | } |
668 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 660 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
669 | prev_desc = NULL; | ||
670 | prev_spte = NULL; | 661 | prev_spte = NULL; |
671 | while (desc) { | 662 | while (desc) { |
672 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { | 663 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { |
@@ -794,7 +785,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
794 | int retval = 0; | 785 | int retval = 0; |
795 | struct kvm_memslots *slots; | 786 | struct kvm_memslots *slots; |
796 | 787 | ||
797 | slots = rcu_dereference(kvm->memslots); | 788 | slots = kvm_memslots(kvm); |
798 | 789 | ||
799 | for (i = 0; i < slots->nmemslots; i++) { | 790 | for (i = 0; i < slots->nmemslots; i++) { |
800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 791 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
@@ -925,7 +916,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
925 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
926 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
927 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
928 | INIT_LIST_HEAD(&sp->oos_link); | ||
929 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
930 | sp->multimapped = 0; | 920 | sp->multimapped = 0; |
931 | sp->parent_pte = parent_pte; | 921 | sp->parent_pte = parent_pte; |
@@ -1009,8 +999,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
1009 | } | 999 | } |
1010 | 1000 | ||
1011 | 1001 | ||
1012 | static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
1013 | mmu_parent_walk_fn fn) | ||
1014 | { | 1003 | { |
1015 | struct kvm_pte_chain *pte_chain; | 1004 | struct kvm_pte_chain *pte_chain; |
1016 | struct hlist_node *node; | 1005 | struct hlist_node *node; |
@@ -1019,8 +1008,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1019 | 1008 | ||
1020 | if (!sp->multimapped && sp->parent_pte) { | 1009 | if (!sp->multimapped && sp->parent_pte) { |
1021 | parent_sp = page_header(__pa(sp->parent_pte)); | 1010 | parent_sp = page_header(__pa(sp->parent_pte)); |
1022 | fn(vcpu, parent_sp); | 1011 | fn(parent_sp); |
1023 | mmu_parent_walk(vcpu, parent_sp, fn); | 1012 | mmu_parent_walk(parent_sp, fn); |
1024 | return; | 1013 | return; |
1025 | } | 1014 | } |
1026 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
@@ -1028,8 +1017,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1028 | if (!pte_chain->parent_ptes[i]) | 1017 | if (!pte_chain->parent_ptes[i]) |
1029 | break; | 1018 | break; |
1030 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); |
1031 | fn(vcpu, parent_sp); | 1020 | fn(parent_sp); |
1032 | mmu_parent_walk(vcpu, parent_sp, fn); | 1021 | mmu_parent_walk(parent_sp, fn); |
1033 | } | 1022 | } |
1034 | } | 1023 | } |
1035 | 1024 | ||
@@ -1066,16 +1055,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | |||
1066 | } | 1055 | } |
1067 | } | 1056 | } |
1068 | 1057 | ||
1069 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) |
1070 | { | 1059 | { |
1071 | kvm_mmu_update_parents_unsync(sp); | 1060 | kvm_mmu_update_parents_unsync(sp); |
1072 | return 1; | 1061 | return 1; |
1073 | } | 1062 | } |
1074 | 1063 | ||
1075 | static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, | 1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) |
1076 | struct kvm_mmu_page *sp) | ||
1077 | { | 1065 | { |
1078 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | 1066 | mmu_parent_walk(sp, unsync_walk_fn); |
1079 | kvm_mmu_update_parents_unsync(sp); | 1067 | kvm_mmu_update_parents_unsync(sp); |
1080 | } | 1068 | } |
1081 | 1069 | ||
@@ -1209,7 +1197,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | |||
1209 | 1197 | ||
1210 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1198 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1211 | { | 1199 | { |
1212 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | 1200 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
1213 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1201 | kvm_mmu_zap_page(vcpu->kvm, sp); |
1214 | return 1; | 1202 | return 1; |
1215 | } | 1203 | } |
@@ -1331,6 +1319,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1331 | role = vcpu->arch.mmu.base_role; | 1319 | role = vcpu->arch.mmu.base_role; |
1332 | role.level = level; | 1320 | role.level = level; |
1333 | role.direct = direct; | 1321 | role.direct = direct; |
1322 | if (role.direct) | ||
1323 | role.cr4_pae = 0; | ||
1334 | role.access = access; | 1324 | role.access = access; |
1335 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
1336 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
@@ -1351,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1351 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1352 | if (sp->unsync_children) { | 1342 | if (sp->unsync_children) { |
1353 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); |
1354 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1344 | kvm_mmu_mark_parents_unsync(sp); |
1355 | } | 1345 | } |
1356 | trace_kvm_mmu_get_page(sp, false); | 1346 | trace_kvm_mmu_get_page(sp, false); |
1357 | return sp; | 1347 | return sp; |
@@ -1490,8 +1480,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
1490 | for_each_sp(pages, sp, parents, i) { | 1480 | for_each_sp(pages, sp, parents, i) { |
1491 | kvm_mmu_zap_page(kvm, sp); | 1481 | kvm_mmu_zap_page(kvm, sp); |
1492 | mmu_pages_clear_parents(&parents); | 1482 | mmu_pages_clear_parents(&parents); |
1483 | zapped++; | ||
1493 | } | 1484 | } |
1494 | zapped += pages.nr; | ||
1495 | kvm_mmu_pages_init(parent, &parents, &pages); | 1485 | kvm_mmu_pages_init(parent, &parents, &pages); |
1496 | } | 1486 | } |
1497 | 1487 | ||
@@ -1542,14 +1532,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
1542 | */ | 1532 | */ |
1543 | 1533 | ||
1544 | if (used_pages > kvm_nr_mmu_pages) { | 1534 | if (used_pages > kvm_nr_mmu_pages) { |
1545 | while (used_pages > kvm_nr_mmu_pages) { | 1535 | while (used_pages > kvm_nr_mmu_pages && |
1536 | !list_empty(&kvm->arch.active_mmu_pages)) { | ||
1546 | struct kvm_mmu_page *page; | 1537 | struct kvm_mmu_page *page; |
1547 | 1538 | ||
1548 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1539 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1549 | struct kvm_mmu_page, link); | 1540 | struct kvm_mmu_page, link); |
1550 | kvm_mmu_zap_page(kvm, page); | 1541 | used_pages -= kvm_mmu_zap_page(kvm, page); |
1551 | used_pages--; | 1542 | used_pages--; |
1552 | } | 1543 | } |
1544 | kvm_nr_mmu_pages = used_pages; | ||
1553 | kvm->arch.n_free_mmu_pages = 0; | 1545 | kvm->arch.n_free_mmu_pages = 0; |
1554 | } | 1546 | } |
1555 | else | 1547 | else |
@@ -1571,13 +1563,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
1571 | r = 0; | 1563 | r = 0; |
1572 | index = kvm_page_table_hashfn(gfn); | 1564 | index = kvm_page_table_hashfn(gfn); |
1573 | bucket = &kvm->arch.mmu_page_hash[index]; | 1565 | bucket = &kvm->arch.mmu_page_hash[index]; |
1566 | restart: | ||
1574 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) |
1575 | if (sp->gfn == gfn && !sp->role.direct) { | 1568 | if (sp->gfn == gfn && !sp->role.direct) { |
1576 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
1577 | sp->role.word); | 1570 | sp->role.word); |
1578 | r = 1; | 1571 | r = 1; |
1579 | if (kvm_mmu_zap_page(kvm, sp)) | 1572 | if (kvm_mmu_zap_page(kvm, sp)) |
1580 | n = bucket->first; | 1573 | goto restart; |
1581 | } | 1574 | } |
1582 | return r; | 1575 | return r; |
1583 | } | 1576 | } |
@@ -1591,12 +1584,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1591 | 1584 | ||
1592 | index = kvm_page_table_hashfn(gfn); | 1585 | index = kvm_page_table_hashfn(gfn); |
1593 | bucket = &kvm->arch.mmu_page_hash[index]; | 1586 | bucket = &kvm->arch.mmu_page_hash[index]; |
1587 | restart: | ||
1594 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { |
1595 | if (sp->gfn == gfn && !sp->role.direct | 1589 | if (sp->gfn == gfn && !sp->role.direct |
1596 | && !sp->role.invalid) { | 1590 | && !sp->role.invalid) { |
1597 | pgprintk("%s: zap %lx %x\n", | 1591 | pgprintk("%s: zap %lx %x\n", |
1598 | __func__, gfn, sp->role.word); | 1592 | __func__, gfn, sp->role.word); |
1599 | kvm_mmu_zap_page(kvm, sp); | 1593 | if (kvm_mmu_zap_page(kvm, sp)) |
1594 | goto restart; | ||
1600 | } | 1595 | } |
1601 | } | 1596 | } |
1602 | } | 1597 | } |
@@ -1623,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) | |||
1623 | } | 1618 | } |
1624 | } | 1619 | } |
1625 | 1620 | ||
1626 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | ||
1627 | { | ||
1628 | struct page *page; | ||
1629 | |||
1630 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | ||
1631 | |||
1632 | if (gpa == UNMAPPED_GVA) | ||
1633 | return NULL; | ||
1634 | |||
1635 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
1636 | |||
1637 | return page; | ||
1638 | } | ||
1639 | |||
1640 | /* | 1621 | /* |
1641 | * The function is based on mtrr_type_lookup() in | 1622 | * The function is based on mtrr_type_lookup() in |
1642 | * arch/x86/kernel/cpu/mtrr/generic.c | 1623 | * arch/x86/kernel/cpu/mtrr/generic.c |
@@ -1762,7 +1743,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1762 | ++vcpu->kvm->stat.mmu_unsync; | 1743 | ++vcpu->kvm->stat.mmu_unsync; |
1763 | sp->unsync = 1; | 1744 | sp->unsync = 1; |
1764 | 1745 | ||
1765 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1746 | kvm_mmu_mark_parents_unsync(sp); |
1766 | 1747 | ||
1767 | mmu_convert_notrap(sp); | 1748 | mmu_convert_notrap(sp); |
1768 | return 0; | 1749 | return 0; |
@@ -2296,13 +2277,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2296 | /* no rsvd bits for 2 level 4K page table entries */ | 2277 | /* no rsvd bits for 2 level 4K page table entries */ |
2297 | context->rsvd_bits_mask[0][1] = 0; | 2278 | context->rsvd_bits_mask[0][1] = 0; |
2298 | context->rsvd_bits_mask[0][0] = 0; | 2279 | context->rsvd_bits_mask[0][0] = 0; |
2280 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
2281 | |||
2282 | if (!is_pse(vcpu)) { | ||
2283 | context->rsvd_bits_mask[1][1] = 0; | ||
2284 | break; | ||
2285 | } | ||
2286 | |||
2299 | if (is_cpuid_PSE36()) | 2287 | if (is_cpuid_PSE36()) |
2300 | /* 36bits PSE 4MB page */ | 2288 | /* 36bits PSE 4MB page */ |
2301 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | 2289 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); |
2302 | else | 2290 | else |
2303 | /* 32 bits PSE 4MB page */ | 2291 | /* 32 bits PSE 4MB page */ |
2304 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2292 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
2305 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | ||
2306 | break; | 2293 | break; |
2307 | case PT32E_ROOT_LEVEL: | 2294 | case PT32E_ROOT_LEVEL: |
2308 | context->rsvd_bits_mask[0][2] = | 2295 | context->rsvd_bits_mask[0][2] = |
@@ -2315,7 +2302,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2315 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2302 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2316 | rsvd_bits(maxphyaddr, 62) | | 2303 | rsvd_bits(maxphyaddr, 62) | |
2317 | rsvd_bits(13, 20); /* large page */ | 2304 | rsvd_bits(13, 20); /* large page */ |
2318 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2305 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2319 | break; | 2306 | break; |
2320 | case PT64_ROOT_LEVEL: | 2307 | case PT64_ROOT_LEVEL: |
2321 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2308 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
@@ -2333,7 +2320,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2333 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2320 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2334 | rsvd_bits(maxphyaddr, 51) | | 2321 | rsvd_bits(maxphyaddr, 51) | |
2335 | rsvd_bits(13, 20); /* large page */ | 2322 | rsvd_bits(13, 20); /* large page */ |
2336 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2323 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2337 | break; | 2324 | break; |
2338 | } | 2325 | } |
2339 | } | 2326 | } |
@@ -2435,7 +2422,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
2435 | else | 2422 | else |
2436 | r = paging32_init_context(vcpu); | 2423 | r = paging32_init_context(vcpu); |
2437 | 2424 | ||
2438 | vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; | 2425 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
2439 | 2426 | ||
2440 | return r; | 2427 | return r; |
2441 | } | 2428 | } |
@@ -2524,7 +2511,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
2524 | } | 2511 | } |
2525 | 2512 | ||
2526 | ++vcpu->kvm->stat.mmu_pte_updated; | 2513 | ++vcpu->kvm->stat.mmu_pte_updated; |
2527 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 2514 | if (!sp->role.cr4_pae) |
2528 | paging32_update_pte(vcpu, sp, spte, new); | 2515 | paging32_update_pte(vcpu, sp, spte, new); |
2529 | else | 2516 | else |
2530 | paging64_update_pte(vcpu, sp, spte, new); | 2517 | paging64_update_pte(vcpu, sp, spte, new); |
@@ -2559,36 +2546,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
2559 | } | 2546 | } |
2560 | 2547 | ||
2561 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2548 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
2562 | const u8 *new, int bytes) | 2549 | u64 gpte) |
2563 | { | 2550 | { |
2564 | gfn_t gfn; | 2551 | gfn_t gfn; |
2565 | int r; | ||
2566 | u64 gpte = 0; | ||
2567 | pfn_t pfn; | 2552 | pfn_t pfn; |
2568 | 2553 | ||
2569 | if (bytes != 4 && bytes != 8) | ||
2570 | return; | ||
2571 | |||
2572 | /* | ||
2573 | * Assume that the pte write on a page table of the same type | ||
2574 | * as the current vcpu paging mode. This is nearly always true | ||
2575 | * (might be false while changing modes). Note it is verified later | ||
2576 | * by update_pte(). | ||
2577 | */ | ||
2578 | if (is_pae(vcpu)) { | ||
2579 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
2580 | if ((bytes == 4) && (gpa % 4 == 0)) { | ||
2581 | r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); | ||
2582 | if (r) | ||
2583 | return; | ||
2584 | memcpy((void *)&gpte + (gpa % 8), new, 4); | ||
2585 | } else if ((bytes == 8) && (gpa % 8 == 0)) { | ||
2586 | memcpy((void *)&gpte, new, 8); | ||
2587 | } | ||
2588 | } else { | ||
2589 | if ((bytes == 4) && (gpa % 4 == 0)) | ||
2590 | memcpy((void *)&gpte, new, 4); | ||
2591 | } | ||
2592 | if (!is_present_gpte(gpte)) | 2554 | if (!is_present_gpte(gpte)) |
2593 | return; | 2555 | return; |
2594 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2556 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
@@ -2637,10 +2599,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2637 | int flooded = 0; | 2599 | int flooded = 0; |
2638 | int npte; | 2600 | int npte; |
2639 | int r; | 2601 | int r; |
2602 | int invlpg_counter; | ||
2640 | 2603 | ||
2641 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2604 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
2642 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 2605 | |
2606 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | ||
2607 | |||
2608 | /* | ||
2609 | * Assume that the pte write on a page table of the same type | ||
2610 | * as the current vcpu paging mode. This is nearly always true | ||
2611 | * (might be false while changing modes). Note it is verified later | ||
2612 | * by update_pte(). | ||
2613 | */ | ||
2614 | if ((is_pae(vcpu) && bytes == 4) || !new) { | ||
2615 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
2616 | if (is_pae(vcpu)) { | ||
2617 | gpa &= ~(gpa_t)7; | ||
2618 | bytes = 8; | ||
2619 | } | ||
2620 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
2621 | if (r) | ||
2622 | gentry = 0; | ||
2623 | new = (const u8 *)&gentry; | ||
2624 | } | ||
2625 | |||
2626 | switch (bytes) { | ||
2627 | case 4: | ||
2628 | gentry = *(const u32 *)new; | ||
2629 | break; | ||
2630 | case 8: | ||
2631 | gentry = *(const u64 *)new; | ||
2632 | break; | ||
2633 | default: | ||
2634 | gentry = 0; | ||
2635 | break; | ||
2636 | } | ||
2637 | |||
2638 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | ||
2643 | spin_lock(&vcpu->kvm->mmu_lock); | 2639 | spin_lock(&vcpu->kvm->mmu_lock); |
2640 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
2641 | gentry = 0; | ||
2644 | kvm_mmu_access_page(vcpu, gfn); | 2642 | kvm_mmu_access_page(vcpu, gfn); |
2645 | kvm_mmu_free_some_pages(vcpu); | 2643 | kvm_mmu_free_some_pages(vcpu); |
2646 | ++vcpu->kvm->stat.mmu_pte_write; | 2644 | ++vcpu->kvm->stat.mmu_pte_write; |
@@ -2659,10 +2657,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2659 | } | 2657 | } |
2660 | index = kvm_page_table_hashfn(gfn); | 2658 | index = kvm_page_table_hashfn(gfn); |
2661 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2659 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
2660 | |||
2661 | restart: | ||
2662 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2662 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
2663 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | 2663 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) |
2664 | continue; | 2664 | continue; |
2665 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 2665 | pte_size = sp->role.cr4_pae ? 8 : 4; |
2666 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2666 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
2667 | misaligned |= bytes < 4; | 2667 | misaligned |= bytes < 4; |
2668 | if (misaligned || flooded) { | 2668 | if (misaligned || flooded) { |
@@ -2679,14 +2679,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2679 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2679 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
2680 | gpa, bytes, sp->role.word); | 2680 | gpa, bytes, sp->role.word); |
2681 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2681 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) |
2682 | n = bucket->first; | 2682 | goto restart; |
2683 | ++vcpu->kvm->stat.mmu_flooded; | 2683 | ++vcpu->kvm->stat.mmu_flooded; |
2684 | continue; | 2684 | continue; |
2685 | } | 2685 | } |
2686 | page_offset = offset; | 2686 | page_offset = offset; |
2687 | level = sp->role.level; | 2687 | level = sp->role.level; |
2688 | npte = 1; | 2688 | npte = 1; |
2689 | if (sp->role.glevels == PT32_ROOT_LEVEL) { | 2689 | if (!sp->role.cr4_pae) { |
2690 | page_offset <<= 1; /* 32->64 */ | 2690 | page_offset <<= 1; /* 32->64 */ |
2691 | /* | 2691 | /* |
2692 | * A 32-bit pde maps 4MB while the shadow pdes map | 2692 | * A 32-bit pde maps 4MB while the shadow pdes map |
@@ -2704,20 +2704,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2704 | continue; | 2704 | continue; |
2705 | } | 2705 | } |
2706 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2706 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
2707 | if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { | ||
2708 | gentry = 0; | ||
2709 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
2710 | gpa & ~(u64)(pte_size - 1), | ||
2711 | &gentry, pte_size); | ||
2712 | new = (const void *)&gentry; | ||
2713 | if (r < 0) | ||
2714 | new = NULL; | ||
2715 | } | ||
2716 | while (npte--) { | 2707 | while (npte--) { |
2717 | entry = *spte; | 2708 | entry = *spte; |
2718 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2709 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
2719 | if (new) | 2710 | if (gentry) |
2720 | mmu_pte_write_new_pte(vcpu, sp, spte, new); | 2711 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
2721 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2712 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); |
2722 | ++spte; | 2713 | ++spte; |
2723 | } | 2714 | } |
@@ -2897,10 +2888,11 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
2897 | struct kvm_mmu_page *sp, *node; | 2888 | struct kvm_mmu_page *sp, *node; |
2898 | 2889 | ||
2899 | spin_lock(&kvm->mmu_lock); | 2890 | spin_lock(&kvm->mmu_lock); |
2891 | restart: | ||
2900 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 2892 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
2901 | if (kvm_mmu_zap_page(kvm, sp)) | 2893 | if (kvm_mmu_zap_page(kvm, sp)) |
2902 | node = container_of(kvm->arch.active_mmu_pages.next, | 2894 | goto restart; |
2903 | struct kvm_mmu_page, link); | 2895 | |
2904 | spin_unlock(&kvm->mmu_lock); | 2896 | spin_unlock(&kvm->mmu_lock); |
2905 | 2897 | ||
2906 | kvm_flush_remote_tlbs(kvm); | 2898 | kvm_flush_remote_tlbs(kvm); |
@@ -3008,7 +3000,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3008 | unsigned int nr_pages = 0; | 3000 | unsigned int nr_pages = 0; |
3009 | struct kvm_memslots *slots; | 3001 | struct kvm_memslots *slots; |
3010 | 3002 | ||
3011 | slots = rcu_dereference(kvm->memslots); | 3003 | slots = kvm_memslots(kvm); |
3004 | |||
3012 | for (i = 0; i < slots->nmemslots; i++) | 3005 | for (i = 0; i < slots->nmemslots; i++) |
3013 | nr_pages += slots->memslots[i].npages; | 3006 | nr_pages += slots->memslots[i].npages; |
3014 | 3007 | ||
@@ -3171,8 +3164,7 @@ static gva_t canonicalize(gva_t gva) | |||
3171 | } | 3164 | } |
3172 | 3165 | ||
3173 | 3166 | ||
3174 | typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, | 3167 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); |
3175 | u64 *sptep); | ||
3176 | 3168 | ||
3177 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | 3169 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, |
3178 | inspect_spte_fn fn) | 3170 | inspect_spte_fn fn) |
@@ -3188,7 +3180,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
3188 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 3180 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
3189 | __mmu_spte_walk(kvm, child, fn); | 3181 | __mmu_spte_walk(kvm, child, fn); |
3190 | } else | 3182 | } else |
3191 | fn(kvm, sp, &sp->spt[i]); | 3183 | fn(kvm, &sp->spt[i]); |
3192 | } | 3184 | } |
3193 | } | 3185 | } |
3194 | } | 3186 | } |
@@ -3279,11 +3271,13 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3279 | 3271 | ||
3280 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3272 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3281 | { | 3273 | { |
3274 | struct kvm *kvm = vcpu->kvm; | ||
3275 | struct kvm_memslots *slots; | ||
3282 | int nmaps = 0; | 3276 | int nmaps = 0; |
3283 | int i, j, k, idx; | 3277 | int i, j, k, idx; |
3284 | 3278 | ||
3285 | idx = srcu_read_lock(&kvm->srcu); | 3279 | idx = srcu_read_lock(&kvm->srcu); |
3286 | slots = rcu_dereference(kvm->memslots); | 3280 | slots = kvm_memslots(kvm); |
3287 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3281 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3288 | struct kvm_memory_slot *m = &slots->memslots[i]; | 3282 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3289 | struct kvm_rmap_desc *d; | 3283 | struct kvm_rmap_desc *d; |
@@ -3312,7 +3306,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3312 | return nmaps; | 3306 | return nmaps; |
3313 | } | 3307 | } |
3314 | 3308 | ||
3315 | void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | 3309 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
3316 | { | 3310 | { |
3317 | unsigned long *rmapp; | 3311 | unsigned long *rmapp; |
3318 | struct kvm_mmu_page *rev_sp; | 3312 | struct kvm_mmu_page *rev_sp; |
@@ -3328,14 +3322,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | |||
3328 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", | 3322 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", |
3329 | audit_msg, gfn); | 3323 | audit_msg, gfn); |
3330 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", | 3324 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", |
3331 | audit_msg, sptep - rev_sp->spt, | 3325 | audit_msg, (long int)(sptep - rev_sp->spt), |
3332 | rev_sp->gfn); | 3326 | rev_sp->gfn); |
3333 | dump_stack(); | 3327 | dump_stack(); |
3334 | return; | 3328 | return; |
3335 | } | 3329 | } |
3336 | 3330 | ||
3337 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3331 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], |
3338 | is_large_pte(*sptep)); | 3332 | rev_sp->role.level); |
3339 | if (!*rmapp) { | 3333 | if (!*rmapp) { |
3340 | if (!printk_ratelimit()) | 3334 | if (!printk_ratelimit()) |
3341 | return; | 3335 | return; |
@@ -3370,7 +3364,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
3370 | continue; | 3364 | continue; |
3371 | if (!(ent & PT_WRITABLE_MASK)) | 3365 | if (!(ent & PT_WRITABLE_MASK)) |
3372 | continue; | 3366 | continue; |
3373 | inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); | 3367 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
3374 | } | 3368 | } |
3375 | } | 3369 | } |
3376 | return; | 3370 | return; |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 3e4a5c6ca2a9..bc4f7f0be2b1 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -6,8 +6,6 @@ | |||
6 | 6 | ||
7 | #undef TRACE_SYSTEM | 7 | #undef TRACE_SYSTEM |
8 | #define TRACE_SYSTEM kvmmmu | 8 | #define TRACE_SYSTEM kvmmmu |
9 | #define TRACE_INCLUDE_PATH . | ||
10 | #define TRACE_INCLUDE_FILE mmutrace | ||
11 | 9 | ||
12 | #define KVM_MMU_PAGE_FIELDS \ | 10 | #define KVM_MMU_PAGE_FIELDS \ |
13 | __field(__u64, gfn) \ | 11 | __field(__u64, gfn) \ |
@@ -30,14 +28,14 @@ | |||
30 | \ | 28 | \ |
31 | role.word = __entry->role; \ | 29 | role.word = __entry->role; \ |
32 | \ | 30 | \ |
33 | trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ | 31 | trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ |
34 | " %snxe root %u %s%c", \ | 32 | " %snxe root %u %s%c", \ |
35 | __entry->gfn, role.level, role.glevels, \ | 33 | __entry->gfn, role.level, \ |
34 | role.cr4_pae ? " pae" : "", \ | ||
36 | role.quadrant, \ | 35 | role.quadrant, \ |
37 | role.direct ? " direct" : "", \ | 36 | role.direct ? " direct" : "", \ |
38 | access_str[role.access], \ | 37 | access_str[role.access], \ |
39 | role.invalid ? " invalid" : "", \ | 38 | role.invalid ? " invalid" : "", \ |
40 | role.cr4_pge ? "" : "!", \ | ||
41 | role.nxe ? "" : "!", \ | 39 | role.nxe ? "" : "!", \ |
42 | __entry->root_count, \ | 40 | __entry->root_count, \ |
43 | __entry->unsync ? "unsync" : "sync", 0); \ | 41 | __entry->unsync ? "unsync" : "sync", 0); \ |
@@ -216,5 +214,10 @@ TRACE_EVENT( | |||
216 | 214 | ||
217 | #endif /* _TRACE_KVMMMU_H */ | 215 | #endif /* _TRACE_KVMMMU_H */ |
218 | 216 | ||
217 | #undef TRACE_INCLUDE_PATH | ||
218 | #define TRACE_INCLUDE_PATH . | ||
219 | #undef TRACE_INCLUDE_FILE | ||
220 | #define TRACE_INCLUDE_FILE mmutrace | ||
221 | |||
219 | /* This part must be outside protection */ | 222 | /* This part must be outside protection */ |
220 | #include <trace/define_trace.h> | 223 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 81eab9a50e6a..d0cc07eb6eda 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -170,7 +170,7 @@ walk: | |||
170 | goto access_error; | 170 | goto access_error; |
171 | 171 | ||
172 | #if PTTYPE == 64 | 172 | #if PTTYPE == 64 |
173 | if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) | 173 | if (fetch_fault && (pte & PT64_NX_MASK)) |
174 | goto access_error; | 174 | goto access_error; |
175 | #endif | 175 | #endif |
176 | 176 | ||
@@ -190,10 +190,10 @@ walk: | |||
190 | 190 | ||
191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || | 191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || |
192 | ((walker->level == PT_DIRECTORY_LEVEL) && | 192 | ((walker->level == PT_DIRECTORY_LEVEL) && |
193 | (pte & PT_PAGE_SIZE_MASK) && | 193 | is_large_pte(pte) && |
194 | (PTTYPE == 64 || is_pse(vcpu))) || | 194 | (PTTYPE == 64 || is_pse(vcpu))) || |
195 | ((walker->level == PT_PDPE_LEVEL) && | 195 | ((walker->level == PT_PDPE_LEVEL) && |
196 | (pte & PT_PAGE_SIZE_MASK) && | 196 | is_large_pte(pte) && |
197 | is_long_mode(vcpu))) { | 197 | is_long_mode(vcpu))) { |
198 | int lvl = walker->level; | 198 | int lvl = walker->level; |
199 | 199 | ||
@@ -258,11 +258,17 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
258 | pt_element_t gpte; | 258 | pt_element_t gpte; |
259 | unsigned pte_access; | 259 | unsigned pte_access; |
260 | pfn_t pfn; | 260 | pfn_t pfn; |
261 | u64 new_spte; | ||
261 | 262 | ||
262 | gpte = *(const pt_element_t *)pte; | 263 | gpte = *(const pt_element_t *)pte; |
263 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
264 | if (!is_present_gpte(gpte)) | 265 | if (!is_present_gpte(gpte)) { |
265 | __set_spte(spte, shadow_notrap_nonpresent_pte); | 266 | if (page->unsync) |
267 | new_spte = shadow_trap_nonpresent_pte; | ||
268 | else | ||
269 | new_spte = shadow_notrap_nonpresent_pte; | ||
270 | __set_spte(spte, new_spte); | ||
271 | } | ||
266 | return; | 272 | return; |
267 | } | 273 | } |
268 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
@@ -457,6 +463,7 @@ out_unlock: | |||
457 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
458 | { | 464 | { |
459 | struct kvm_shadow_walk_iterator iterator; | 465 | struct kvm_shadow_walk_iterator iterator; |
466 | gpa_t pte_gpa = -1; | ||
460 | int level; | 467 | int level; |
461 | u64 *sptep; | 468 | u64 *sptep; |
462 | int need_flush = 0; | 469 | int need_flush = 0; |
@@ -470,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
470 | if (level == PT_PAGE_TABLE_LEVEL || | 477 | if (level == PT_PAGE_TABLE_LEVEL || |
471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 478 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || |
472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 479 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { |
480 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
481 | |||
482 | pte_gpa = (sp->gfn << PAGE_SHIFT); | ||
483 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
473 | 484 | ||
474 | if (is_shadow_present_pte(*sptep)) { | 485 | if (is_shadow_present_pte(*sptep)) { |
475 | rmap_remove(vcpu->kvm, sptep); | 486 | rmap_remove(vcpu->kvm, sptep); |
@@ -487,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
487 | 498 | ||
488 | if (need_flush) | 499 | if (need_flush) |
489 | kvm_flush_remote_tlbs(vcpu->kvm); | 500 | kvm_flush_remote_tlbs(vcpu->kvm); |
501 | |||
502 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
503 | |||
490 | spin_unlock(&vcpu->kvm->mmu_lock); | 504 | spin_unlock(&vcpu->kvm->mmu_lock); |
505 | |||
506 | if (pte_gpa == -1) | ||
507 | return; | ||
508 | |||
509 | if (mmu_topup_memory_caches(vcpu)) | ||
510 | return; | ||
511 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
491 | } | 512 | } |
492 | 513 | ||
493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 514 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
@@ -551,12 +572,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
551 | { | 572 | { |
552 | int i, offset, nr_present; | 573 | int i, offset, nr_present; |
553 | bool reset_host_protection; | 574 | bool reset_host_protection; |
575 | gpa_t first_pte_gpa; | ||
554 | 576 | ||
555 | offset = nr_present = 0; | 577 | offset = nr_present = 0; |
556 | 578 | ||
557 | if (PTTYPE == 32) | 579 | if (PTTYPE == 32) |
558 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 580 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
559 | 581 | ||
582 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
583 | |||
560 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 584 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
561 | unsigned pte_access; | 585 | unsigned pte_access; |
562 | pt_element_t gpte; | 586 | pt_element_t gpte; |
@@ -566,8 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
566 | if (!is_shadow_present_pte(sp->spt[i])) | 590 | if (!is_shadow_present_pte(sp->spt[i])) |
567 | continue; | 591 | continue; |
568 | 592 | ||
569 | pte_gpa = gfn_to_gpa(sp->gfn); | 593 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); |
570 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
571 | 594 | ||
572 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | 595 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, |
573 | sizeof(pt_element_t))) | 596 | sizeof(pt_element_t))) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 445c59411ed0..ab78eb8ba899 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -44,10 +44,11 @@ MODULE_LICENSE("GPL"); | |||
44 | #define SEG_TYPE_LDT 2 | 44 | #define SEG_TYPE_LDT 2 |
45 | #define SEG_TYPE_BUSY_TSS16 3 | 45 | #define SEG_TYPE_BUSY_TSS16 3 |
46 | 46 | ||
47 | #define SVM_FEATURE_NPT (1 << 0) | 47 | #define SVM_FEATURE_NPT (1 << 0) |
48 | #define SVM_FEATURE_LBRV (1 << 1) | 48 | #define SVM_FEATURE_LBRV (1 << 1) |
49 | #define SVM_FEATURE_SVML (1 << 2) | 49 | #define SVM_FEATURE_SVML (1 << 2) |
50 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 50 | #define SVM_FEATURE_NRIP (1 << 3) |
51 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
51 | 52 | ||
52 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 53 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
53 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 54 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
@@ -70,6 +71,7 @@ struct kvm_vcpu; | |||
70 | struct nested_state { | 71 | struct nested_state { |
71 | struct vmcb *hsave; | 72 | struct vmcb *hsave; |
72 | u64 hsave_msr; | 73 | u64 hsave_msr; |
74 | u64 vm_cr_msr; | ||
73 | u64 vmcb; | 75 | u64 vmcb; |
74 | 76 | ||
75 | /* These are the merged vectors */ | 77 | /* These are the merged vectors */ |
@@ -77,6 +79,7 @@ struct nested_state { | |||
77 | 79 | ||
78 | /* gpa pointers to the real vectors */ | 80 | /* gpa pointers to the real vectors */ |
79 | u64 vmcb_msrpm; | 81 | u64 vmcb_msrpm; |
82 | u64 vmcb_iopm; | ||
80 | 83 | ||
81 | /* A VMEXIT is required but not yet emulated */ | 84 | /* A VMEXIT is required but not yet emulated */ |
82 | bool exit_required; | 85 | bool exit_required; |
@@ -91,6 +94,9 @@ struct nested_state { | |||
91 | 94 | ||
92 | }; | 95 | }; |
93 | 96 | ||
97 | #define MSRPM_OFFSETS 16 | ||
98 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; | ||
99 | |||
94 | struct vcpu_svm { | 100 | struct vcpu_svm { |
95 | struct kvm_vcpu vcpu; | 101 | struct kvm_vcpu vcpu; |
96 | struct vmcb *vmcb; | 102 | struct vmcb *vmcb; |
@@ -110,13 +116,39 @@ struct vcpu_svm { | |||
110 | struct nested_state nested; | 116 | struct nested_state nested; |
111 | 117 | ||
112 | bool nmi_singlestep; | 118 | bool nmi_singlestep; |
119 | |||
120 | unsigned int3_injected; | ||
121 | unsigned long int3_rip; | ||
122 | }; | ||
123 | |||
124 | #define MSR_INVALID 0xffffffffU | ||
125 | |||
126 | static struct svm_direct_access_msrs { | ||
127 | u32 index; /* Index of the MSR */ | ||
128 | bool always; /* True if intercept is always on */ | ||
129 | } direct_access_msrs[] = { | ||
130 | { .index = MSR_K6_STAR, .always = true }, | ||
131 | { .index = MSR_IA32_SYSENTER_CS, .always = true }, | ||
132 | #ifdef CONFIG_X86_64 | ||
133 | { .index = MSR_GS_BASE, .always = true }, | ||
134 | { .index = MSR_FS_BASE, .always = true }, | ||
135 | { .index = MSR_KERNEL_GS_BASE, .always = true }, | ||
136 | { .index = MSR_LSTAR, .always = true }, | ||
137 | { .index = MSR_CSTAR, .always = true }, | ||
138 | { .index = MSR_SYSCALL_MASK, .always = true }, | ||
139 | #endif | ||
140 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, | ||
141 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, | ||
142 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, | ||
143 | { .index = MSR_IA32_LASTINTTOIP, .always = false }, | ||
144 | { .index = MSR_INVALID, .always = false }, | ||
113 | }; | 145 | }; |
114 | 146 | ||
115 | /* enable NPT for AMD64 and X86 with PAE */ | 147 | /* enable NPT for AMD64 and X86 with PAE */ |
116 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 148 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
117 | static bool npt_enabled = true; | 149 | static bool npt_enabled = true; |
118 | #else | 150 | #else |
119 | static bool npt_enabled = false; | 151 | static bool npt_enabled; |
120 | #endif | 152 | #endif |
121 | static int npt = 1; | 153 | static int npt = 1; |
122 | 154 | ||
@@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu); | |||
129 | static void svm_complete_interrupts(struct vcpu_svm *svm); | 161 | static void svm_complete_interrupts(struct vcpu_svm *svm); |
130 | 162 | ||
131 | static int nested_svm_exit_handled(struct vcpu_svm *svm); | 163 | static int nested_svm_exit_handled(struct vcpu_svm *svm); |
164 | static int nested_svm_intercept(struct vcpu_svm *svm); | ||
132 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 165 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
133 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 166 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
134 | bool has_error_code, u32 error_code); | 167 | bool has_error_code, u32 error_code); |
@@ -163,8 +196,8 @@ static unsigned long iopm_base; | |||
163 | struct kvm_ldttss_desc { | 196 | struct kvm_ldttss_desc { |
164 | u16 limit0; | 197 | u16 limit0; |
165 | u16 base0; | 198 | u16 base0; |
166 | unsigned base1 : 8, type : 5, dpl : 2, p : 1; | 199 | unsigned base1:8, type:5, dpl:2, p:1; |
167 | unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; | 200 | unsigned limit1:4, zero0:3, g:1, base2:8; |
168 | u32 base3; | 201 | u32 base3; |
169 | u32 zero1; | 202 | u32 zero1; |
170 | } __attribute__((packed)); | 203 | } __attribute__((packed)); |
@@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; | |||
194 | #define MSRS_RANGE_SIZE 2048 | 227 | #define MSRS_RANGE_SIZE 2048 |
195 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) | 228 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) |
196 | 229 | ||
230 | static u32 svm_msrpm_offset(u32 msr) | ||
231 | { | ||
232 | u32 offset; | ||
233 | int i; | ||
234 | |||
235 | for (i = 0; i < NUM_MSR_MAPS; i++) { | ||
236 | if (msr < msrpm_ranges[i] || | ||
237 | msr >= msrpm_ranges[i] + MSRS_IN_RANGE) | ||
238 | continue; | ||
239 | |||
240 | offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ | ||
241 | offset += (i * MSRS_RANGE_SIZE); /* add range offset */ | ||
242 | |||
243 | /* Now we have the u8 offset - but need the u32 offset */ | ||
244 | return offset / 4; | ||
245 | } | ||
246 | |||
247 | /* MSR not in any range */ | ||
248 | return MSR_INVALID; | ||
249 | } | ||
250 | |||
197 | #define MAX_INST_SIZE 15 | 251 | #define MAX_INST_SIZE 15 |
198 | 252 | ||
199 | static inline u32 svm_has(u32 feat) | 253 | static inline u32 svm_has(u32 feat) |
@@ -213,7 +267,7 @@ static inline void stgi(void) | |||
213 | 267 | ||
214 | static inline void invlpga(unsigned long addr, u32 asid) | 268 | static inline void invlpga(unsigned long addr, u32 asid) |
215 | { | 269 | { |
216 | asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); | 270 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
217 | } | 271 | } |
218 | 272 | ||
219 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | 273 | static inline void force_new_asid(struct kvm_vcpu *vcpu) |
@@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
235 | vcpu->arch.efer = efer; | 289 | vcpu->arch.efer = efer; |
236 | } | 290 | } |
237 | 291 | ||
238 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
239 | bool has_error_code, u32 error_code) | ||
240 | { | ||
241 | struct vcpu_svm *svm = to_svm(vcpu); | ||
242 | |||
243 | /* If we are within a nested VM we'd better #VMEXIT and let the | ||
244 | guest handle the exception */ | ||
245 | if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
246 | return; | ||
247 | |||
248 | svm->vmcb->control.event_inj = nr | ||
249 | | SVM_EVTINJ_VALID | ||
250 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
251 | | SVM_EVTINJ_TYPE_EXEPT; | ||
252 | svm->vmcb->control.event_inj_err = error_code; | ||
253 | } | ||
254 | |||
255 | static int is_external_interrupt(u32 info) | 292 | static int is_external_interrupt(u32 info) |
256 | { | 293 | { |
257 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 294 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; |
@@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
264 | u32 ret = 0; | 301 | u32 ret = 0; |
265 | 302 | ||
266 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) | 303 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
267 | ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; | 304 | ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; |
268 | return ret & mask; | 305 | return ret & mask; |
269 | } | 306 | } |
270 | 307 | ||
@@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
283 | { | 320 | { |
284 | struct vcpu_svm *svm = to_svm(vcpu); | 321 | struct vcpu_svm *svm = to_svm(vcpu); |
285 | 322 | ||
323 | if (svm->vmcb->control.next_rip != 0) | ||
324 | svm->next_rip = svm->vmcb->control.next_rip; | ||
325 | |||
286 | if (!svm->next_rip) { | 326 | if (!svm->next_rip) { |
287 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != | 327 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
288 | EMULATE_DONE) | 328 | EMULATE_DONE) |
@@ -297,6 +337,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
297 | svm_set_interrupt_shadow(vcpu, 0); | 337 | svm_set_interrupt_shadow(vcpu, 0); |
298 | } | 338 | } |
299 | 339 | ||
340 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
341 | bool has_error_code, u32 error_code) | ||
342 | { | ||
343 | struct vcpu_svm *svm = to_svm(vcpu); | ||
344 | |||
345 | /* | ||
346 | * If we are within a nested VM we'd better #VMEXIT and let the guest | ||
347 | * handle the exception | ||
348 | */ | ||
349 | if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
350 | return; | ||
351 | |||
352 | if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { | ||
353 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | ||
354 | |||
355 | /* | ||
356 | * For guest debugging where we have to reinject #BP if some | ||
357 | * INT3 is guest-owned: | ||
358 | * Emulate nRIP by moving RIP forward. Will fail if injection | ||
359 | * raises a fault that is not intercepted. Still better than | ||
360 | * failing in all cases. | ||
361 | */ | ||
362 | skip_emulated_instruction(&svm->vcpu); | ||
363 | rip = kvm_rip_read(&svm->vcpu); | ||
364 | svm->int3_rip = rip + svm->vmcb->save.cs.base; | ||
365 | svm->int3_injected = rip - old_rip; | ||
366 | } | ||
367 | |||
368 | svm->vmcb->control.event_inj = nr | ||
369 | | SVM_EVTINJ_VALID | ||
370 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
371 | | SVM_EVTINJ_TYPE_EXEPT; | ||
372 | svm->vmcb->control.event_inj_err = error_code; | ||
373 | } | ||
374 | |||
300 | static int has_svm(void) | 375 | static int has_svm(void) |
301 | { | 376 | { |
302 | const char *msg; | 377 | const char *msg; |
@@ -319,7 +394,7 @@ static int svm_hardware_enable(void *garbage) | |||
319 | 394 | ||
320 | struct svm_cpu_data *sd; | 395 | struct svm_cpu_data *sd; |
321 | uint64_t efer; | 396 | uint64_t efer; |
322 | struct descriptor_table gdt_descr; | 397 | struct desc_ptr gdt_descr; |
323 | struct desc_struct *gdt; | 398 | struct desc_struct *gdt; |
324 | int me = raw_smp_processor_id(); | 399 | int me = raw_smp_processor_id(); |
325 | 400 | ||
@@ -344,8 +419,8 @@ static int svm_hardware_enable(void *garbage) | |||
344 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 419 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
345 | sd->next_asid = sd->max_asid + 1; | 420 | sd->next_asid = sd->max_asid + 1; |
346 | 421 | ||
347 | kvm_get_gdt(&gdt_descr); | 422 | native_store_gdt(&gdt_descr); |
348 | gdt = (struct desc_struct *)gdt_descr.base; | 423 | gdt = (struct desc_struct *)gdt_descr.address; |
349 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 424 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
350 | 425 | ||
351 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 426 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
@@ -391,42 +466,98 @@ err_1: | |||
391 | 466 | ||
392 | } | 467 | } |
393 | 468 | ||
469 | static bool valid_msr_intercept(u32 index) | ||
470 | { | ||
471 | int i; | ||
472 | |||
473 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) | ||
474 | if (direct_access_msrs[i].index == index) | ||
475 | return true; | ||
476 | |||
477 | return false; | ||
478 | } | ||
479 | |||
394 | static void set_msr_interception(u32 *msrpm, unsigned msr, | 480 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
395 | int read, int write) | 481 | int read, int write) |
396 | { | 482 | { |
483 | u8 bit_read, bit_write; | ||
484 | unsigned long tmp; | ||
485 | u32 offset; | ||
486 | |||
487 | /* | ||
488 | * If this warning triggers extend the direct_access_msrs list at the | ||
489 | * beginning of the file | ||
490 | */ | ||
491 | WARN_ON(!valid_msr_intercept(msr)); | ||
492 | |||
493 | offset = svm_msrpm_offset(msr); | ||
494 | bit_read = 2 * (msr & 0x0f); | ||
495 | bit_write = 2 * (msr & 0x0f) + 1; | ||
496 | tmp = msrpm[offset]; | ||
497 | |||
498 | BUG_ON(offset == MSR_INVALID); | ||
499 | |||
500 | read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); | ||
501 | write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); | ||
502 | |||
503 | msrpm[offset] = tmp; | ||
504 | } | ||
505 | |||
506 | static void svm_vcpu_init_msrpm(u32 *msrpm) | ||
507 | { | ||
397 | int i; | 508 | int i; |
398 | 509 | ||
399 | for (i = 0; i < NUM_MSR_MAPS; i++) { | 510 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); |
400 | if (msr >= msrpm_ranges[i] && | 511 | |
401 | msr < msrpm_ranges[i] + MSRS_IN_RANGE) { | 512 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
402 | u32 msr_offset = (i * MSRS_IN_RANGE + msr - | 513 | if (!direct_access_msrs[i].always) |
403 | msrpm_ranges[i]) * 2; | 514 | continue; |
404 | 515 | ||
405 | u32 *base = msrpm + (msr_offset / 32); | 516 | set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); |
406 | u32 msr_shift = msr_offset % 32; | 517 | } |
407 | u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); | 518 | } |
408 | *base = (*base & ~(0x3 << msr_shift)) | | 519 | |
409 | (mask << msr_shift); | 520 | static void add_msr_offset(u32 offset) |
521 | { | ||
522 | int i; | ||
523 | |||
524 | for (i = 0; i < MSRPM_OFFSETS; ++i) { | ||
525 | |||
526 | /* Offset already in list? */ | ||
527 | if (msrpm_offsets[i] == offset) | ||
410 | return; | 528 | return; |
411 | } | 529 | |
530 | /* Slot used by another offset? */ | ||
531 | if (msrpm_offsets[i] != MSR_INVALID) | ||
532 | continue; | ||
533 | |||
534 | /* Add offset to list */ | ||
535 | msrpm_offsets[i] = offset; | ||
536 | |||
537 | return; | ||
412 | } | 538 | } |
539 | |||
540 | /* | ||
541 | * If this BUG triggers the msrpm_offsets table has an overflow. Just | ||
542 | * increase MSRPM_OFFSETS in this case. | ||
543 | */ | ||
413 | BUG(); | 544 | BUG(); |
414 | } | 545 | } |
415 | 546 | ||
416 | static void svm_vcpu_init_msrpm(u32 *msrpm) | 547 | static void init_msrpm_offsets(void) |
417 | { | 548 | { |
418 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); | 549 | int i; |
419 | 550 | ||
420 | #ifdef CONFIG_X86_64 | 551 | memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); |
421 | set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); | 552 | |
422 | set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); | 553 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
423 | set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); | 554 | u32 offset; |
424 | set_msr_interception(msrpm, MSR_LSTAR, 1, 1); | 555 | |
425 | set_msr_interception(msrpm, MSR_CSTAR, 1, 1); | 556 | offset = svm_msrpm_offset(direct_access_msrs[i].index); |
426 | set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); | 557 | BUG_ON(offset == MSR_INVALID); |
427 | #endif | 558 | |
428 | set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); | 559 | add_msr_offset(offset); |
429 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); | 560 | } |
430 | } | 561 | } |
431 | 562 | ||
432 | static void svm_enable_lbrv(struct vcpu_svm *svm) | 563 | static void svm_enable_lbrv(struct vcpu_svm *svm) |
@@ -467,6 +598,8 @@ static __init int svm_hardware_setup(void) | |||
467 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); | 598 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); |
468 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; | 599 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; |
469 | 600 | ||
601 | init_msrpm_offsets(); | ||
602 | |||
470 | if (boot_cpu_has(X86_FEATURE_NX)) | 603 | if (boot_cpu_has(X86_FEATURE_NX)) |
471 | kvm_enable_efer_bits(EFER_NX); | 604 | kvm_enable_efer_bits(EFER_NX); |
472 | 605 | ||
@@ -523,7 +656,7 @@ static void init_seg(struct vmcb_seg *seg) | |||
523 | { | 656 | { |
524 | seg->selector = 0; | 657 | seg->selector = 0; |
525 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | | 658 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | |
526 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ | 659 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ |
527 | seg->limit = 0xffff; | 660 | seg->limit = 0xffff; |
528 | seg->base = 0; | 661 | seg->base = 0; |
529 | } | 662 | } |
@@ -543,16 +676,16 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
543 | 676 | ||
544 | svm->vcpu.fpu_active = 1; | 677 | svm->vcpu.fpu_active = 1; |
545 | 678 | ||
546 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 679 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
547 | INTERCEPT_CR3_MASK | | 680 | INTERCEPT_CR3_MASK | |
548 | INTERCEPT_CR4_MASK; | 681 | INTERCEPT_CR4_MASK; |
549 | 682 | ||
550 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 683 | control->intercept_cr_write = INTERCEPT_CR0_MASK | |
551 | INTERCEPT_CR3_MASK | | 684 | INTERCEPT_CR3_MASK | |
552 | INTERCEPT_CR4_MASK | | 685 | INTERCEPT_CR4_MASK | |
553 | INTERCEPT_CR8_MASK; | 686 | INTERCEPT_CR8_MASK; |
554 | 687 | ||
555 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 688 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
556 | INTERCEPT_DR1_MASK | | 689 | INTERCEPT_DR1_MASK | |
557 | INTERCEPT_DR2_MASK | | 690 | INTERCEPT_DR2_MASK | |
558 | INTERCEPT_DR3_MASK | | 691 | INTERCEPT_DR3_MASK | |
@@ -561,7 +694,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
561 | INTERCEPT_DR6_MASK | | 694 | INTERCEPT_DR6_MASK | |
562 | INTERCEPT_DR7_MASK; | 695 | INTERCEPT_DR7_MASK; |
563 | 696 | ||
564 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 697 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
565 | INTERCEPT_DR1_MASK | | 698 | INTERCEPT_DR1_MASK | |
566 | INTERCEPT_DR2_MASK | | 699 | INTERCEPT_DR2_MASK | |
567 | INTERCEPT_DR3_MASK | | 700 | INTERCEPT_DR3_MASK | |
@@ -575,7 +708,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
575 | (1 << MC_VECTOR); | 708 | (1 << MC_VECTOR); |
576 | 709 | ||
577 | 710 | ||
578 | control->intercept = (1ULL << INTERCEPT_INTR) | | 711 | control->intercept = (1ULL << INTERCEPT_INTR) | |
579 | (1ULL << INTERCEPT_NMI) | | 712 | (1ULL << INTERCEPT_NMI) | |
580 | (1ULL << INTERCEPT_SMI) | | 713 | (1ULL << INTERCEPT_SMI) | |
581 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | 714 | (1ULL << INTERCEPT_SELECTIVE_CR0) | |
@@ -636,7 +769,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
636 | save->rip = 0x0000fff0; | 769 | save->rip = 0x0000fff0; |
637 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 770 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
638 | 771 | ||
639 | /* This is the guest-visible cr0 value. | 772 | /* |
773 | * This is the guest-visible cr0 value. | ||
640 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 774 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
641 | */ | 775 | */ |
642 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 776 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
@@ -706,30 +840,30 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
706 | if (err) | 840 | if (err) |
707 | goto free_svm; | 841 | goto free_svm; |
708 | 842 | ||
843 | err = -ENOMEM; | ||
709 | page = alloc_page(GFP_KERNEL); | 844 | page = alloc_page(GFP_KERNEL); |
710 | if (!page) { | 845 | if (!page) |
711 | err = -ENOMEM; | ||
712 | goto uninit; | 846 | goto uninit; |
713 | } | ||
714 | 847 | ||
715 | err = -ENOMEM; | ||
716 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 848 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); |
717 | if (!msrpm_pages) | 849 | if (!msrpm_pages) |
718 | goto uninit; | 850 | goto free_page1; |
719 | 851 | ||
720 | nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 852 | nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); |
721 | if (!nested_msrpm_pages) | 853 | if (!nested_msrpm_pages) |
722 | goto uninit; | 854 | goto free_page2; |
723 | |||
724 | svm->msrpm = page_address(msrpm_pages); | ||
725 | svm_vcpu_init_msrpm(svm->msrpm); | ||
726 | 855 | ||
727 | hsave_page = alloc_page(GFP_KERNEL); | 856 | hsave_page = alloc_page(GFP_KERNEL); |
728 | if (!hsave_page) | 857 | if (!hsave_page) |
729 | goto uninit; | 858 | goto free_page3; |
859 | |||
730 | svm->nested.hsave = page_address(hsave_page); | 860 | svm->nested.hsave = page_address(hsave_page); |
731 | 861 | ||
862 | svm->msrpm = page_address(msrpm_pages); | ||
863 | svm_vcpu_init_msrpm(svm->msrpm); | ||
864 | |||
732 | svm->nested.msrpm = page_address(nested_msrpm_pages); | 865 | svm->nested.msrpm = page_address(nested_msrpm_pages); |
866 | svm_vcpu_init_msrpm(svm->nested.msrpm); | ||
733 | 867 | ||
734 | svm->vmcb = page_address(page); | 868 | svm->vmcb = page_address(page); |
735 | clear_page(svm->vmcb); | 869 | clear_page(svm->vmcb); |
@@ -744,6 +878,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
744 | 878 | ||
745 | return &svm->vcpu; | 879 | return &svm->vcpu; |
746 | 880 | ||
881 | free_page3: | ||
882 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); | ||
883 | free_page2: | ||
884 | __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); | ||
885 | free_page1: | ||
886 | __free_page(page); | ||
747 | uninit: | 887 | uninit: |
748 | kvm_vcpu_uninit(&svm->vcpu); | 888 | kvm_vcpu_uninit(&svm->vcpu); |
749 | free_svm: | 889 | free_svm: |
@@ -877,7 +1017,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
877 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 1017 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
878 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 1018 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; |
879 | 1019 | ||
880 | /* AMD's VMCB does not have an explicit unusable field, so emulate it | 1020 | /* |
1021 | * AMD's VMCB does not have an explicit unusable field, so emulate it | ||
881 | * for cross vendor migration purposes by "not present" | 1022 | * for cross vendor migration purposes by "not present" |
882 | */ | 1023 | */ |
883 | var->unusable = !var->present || (var->type == 0); | 1024 | var->unusable = !var->present || (var->type == 0); |
@@ -913,7 +1054,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
913 | var->type |= 0x1; | 1054 | var->type |= 0x1; |
914 | break; | 1055 | break; |
915 | case VCPU_SREG_SS: | 1056 | case VCPU_SREG_SS: |
916 | /* On AMD CPUs sometimes the DB bit in the segment | 1057 | /* |
1058 | * On AMD CPUs sometimes the DB bit in the segment | ||
917 | * descriptor is left as 1, although the whole segment has | 1059 | * descriptor is left as 1, although the whole segment has |
918 | * been made unusable. Clear it here to pass an Intel VMX | 1060 | * been made unusable. Clear it here to pass an Intel VMX |
919 | * entry check when cross vendor migrating. | 1061 | * entry check when cross vendor migrating. |
@@ -931,36 +1073,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) | |||
931 | return save->cpl; | 1073 | return save->cpl; |
932 | } | 1074 | } |
933 | 1075 | ||
934 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1076 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
935 | { | 1077 | { |
936 | struct vcpu_svm *svm = to_svm(vcpu); | 1078 | struct vcpu_svm *svm = to_svm(vcpu); |
937 | 1079 | ||
938 | dt->limit = svm->vmcb->save.idtr.limit; | 1080 | dt->size = svm->vmcb->save.idtr.limit; |
939 | dt->base = svm->vmcb->save.idtr.base; | 1081 | dt->address = svm->vmcb->save.idtr.base; |
940 | } | 1082 | } |
941 | 1083 | ||
942 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1084 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
943 | { | 1085 | { |
944 | struct vcpu_svm *svm = to_svm(vcpu); | 1086 | struct vcpu_svm *svm = to_svm(vcpu); |
945 | 1087 | ||
946 | svm->vmcb->save.idtr.limit = dt->limit; | 1088 | svm->vmcb->save.idtr.limit = dt->size; |
947 | svm->vmcb->save.idtr.base = dt->base ; | 1089 | svm->vmcb->save.idtr.base = dt->address ; |
948 | } | 1090 | } |
949 | 1091 | ||
950 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1092 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
951 | { | 1093 | { |
952 | struct vcpu_svm *svm = to_svm(vcpu); | 1094 | struct vcpu_svm *svm = to_svm(vcpu); |
953 | 1095 | ||
954 | dt->limit = svm->vmcb->save.gdtr.limit; | 1096 | dt->size = svm->vmcb->save.gdtr.limit; |
955 | dt->base = svm->vmcb->save.gdtr.base; | 1097 | dt->address = svm->vmcb->save.gdtr.base; |
956 | } | 1098 | } |
957 | 1099 | ||
958 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1100 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
959 | { | 1101 | { |
960 | struct vcpu_svm *svm = to_svm(vcpu); | 1102 | struct vcpu_svm *svm = to_svm(vcpu); |
961 | 1103 | ||
962 | svm->vmcb->save.gdtr.limit = dt->limit; | 1104 | svm->vmcb->save.gdtr.limit = dt->size; |
963 | svm->vmcb->save.gdtr.base = dt->base ; | 1105 | svm->vmcb->save.gdtr.base = dt->address ; |
964 | } | 1106 | } |
965 | 1107 | ||
966 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1108 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
@@ -973,6 +1115,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
973 | 1115 | ||
974 | static void update_cr0_intercept(struct vcpu_svm *svm) | 1116 | static void update_cr0_intercept(struct vcpu_svm *svm) |
975 | { | 1117 | { |
1118 | struct vmcb *vmcb = svm->vmcb; | ||
976 | ulong gcr0 = svm->vcpu.arch.cr0; | 1119 | ulong gcr0 = svm->vcpu.arch.cr0; |
977 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1120 | u64 *hcr0 = &svm->vmcb->save.cr0; |
978 | 1121 | ||
@@ -984,11 +1127,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
984 | 1127 | ||
985 | 1128 | ||
986 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1129 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { |
987 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | 1130 | vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; |
988 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | 1131 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; |
1132 | if (is_nested(svm)) { | ||
1133 | struct vmcb *hsave = svm->nested.hsave; | ||
1134 | |||
1135 | hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
1136 | hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
1137 | vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; | ||
1138 | vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; | ||
1139 | } | ||
989 | } else { | 1140 | } else { |
990 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | 1141 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; |
991 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | 1142 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; |
1143 | if (is_nested(svm)) { | ||
1144 | struct vmcb *hsave = svm->nested.hsave; | ||
1145 | |||
1146 | hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
1147 | hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
1148 | } | ||
992 | } | 1149 | } |
993 | } | 1150 | } |
994 | 1151 | ||
@@ -996,6 +1153,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
996 | { | 1153 | { |
997 | struct vcpu_svm *svm = to_svm(vcpu); | 1154 | struct vcpu_svm *svm = to_svm(vcpu); |
998 | 1155 | ||
1156 | if (is_nested(svm)) { | ||
1157 | /* | ||
1158 | * We are here because we run in nested mode, the host kvm | ||
1159 | * intercepts cr0 writes but the l1 hypervisor does not. | ||
1160 | * But the L1 hypervisor may intercept selective cr0 writes. | ||
1161 | * This needs to be checked here. | ||
1162 | */ | ||
1163 | unsigned long old, new; | ||
1164 | |||
1165 | /* Remove bits that would trigger a real cr0 write intercept */ | ||
1166 | old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1167 | new = cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1168 | |||
1169 | if (old == new) { | ||
1170 | /* cr0 write with ts and mp unchanged */ | ||
1171 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
1172 | if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) | ||
1173 | return; | ||
1174 | } | ||
1175 | } | ||
1176 | |||
999 | #ifdef CONFIG_X86_64 | 1177 | #ifdef CONFIG_X86_64 |
1000 | if (vcpu->arch.efer & EFER_LME) { | 1178 | if (vcpu->arch.efer & EFER_LME) { |
1001 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1179 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
@@ -1129,70 +1307,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1129 | svm->vmcb->control.asid = sd->next_asid++; | 1307 | svm->vmcb->control.asid = sd->next_asid++; |
1130 | } | 1308 | } |
1131 | 1309 | ||
1132 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) | 1310 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
1133 | { | ||
1134 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1135 | |||
1136 | switch (dr) { | ||
1137 | case 0 ... 3: | ||
1138 | *dest = vcpu->arch.db[dr]; | ||
1139 | break; | ||
1140 | case 4: | ||
1141 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1142 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1143 | /* fall through */ | ||
1144 | case 6: | ||
1145 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1146 | *dest = vcpu->arch.dr6; | ||
1147 | else | ||
1148 | *dest = svm->vmcb->save.dr6; | ||
1149 | break; | ||
1150 | case 5: | ||
1151 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1152 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1153 | /* fall through */ | ||
1154 | case 7: | ||
1155 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1156 | *dest = vcpu->arch.dr7; | ||
1157 | else | ||
1158 | *dest = svm->vmcb->save.dr7; | ||
1159 | break; | ||
1160 | } | ||
1161 | |||
1162 | return EMULATE_DONE; | ||
1163 | } | ||
1164 | |||
1165 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) | ||
1166 | { | 1311 | { |
1167 | struct vcpu_svm *svm = to_svm(vcpu); | 1312 | struct vcpu_svm *svm = to_svm(vcpu); |
1168 | 1313 | ||
1169 | switch (dr) { | 1314 | svm->vmcb->save.dr7 = value; |
1170 | case 0 ... 3: | ||
1171 | vcpu->arch.db[dr] = value; | ||
1172 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
1173 | vcpu->arch.eff_db[dr] = value; | ||
1174 | break; | ||
1175 | case 4: | ||
1176 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1177 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1178 | /* fall through */ | ||
1179 | case 6: | ||
1180 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | ||
1181 | break; | ||
1182 | case 5: | ||
1183 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1184 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1185 | /* fall through */ | ||
1186 | case 7: | ||
1187 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | ||
1188 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
1189 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | ||
1190 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | ||
1191 | } | ||
1192 | break; | ||
1193 | } | ||
1194 | |||
1195 | return EMULATE_DONE; | ||
1196 | } | 1315 | } |
1197 | 1316 | ||
1198 | static int pf_interception(struct vcpu_svm *svm) | 1317 | static int pf_interception(struct vcpu_svm *svm) |
@@ -1229,7 +1348,7 @@ static int db_interception(struct vcpu_svm *svm) | |||
1229 | } | 1348 | } |
1230 | 1349 | ||
1231 | if (svm->vcpu.guest_debug & | 1350 | if (svm->vcpu.guest_debug & |
1232 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ | 1351 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { |
1233 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1352 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
1234 | kvm_run->debug.arch.pc = | 1353 | kvm_run->debug.arch.pc = |
1235 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1354 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
@@ -1263,7 +1382,22 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1263 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1382 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1264 | { | 1383 | { |
1265 | struct vcpu_svm *svm = to_svm(vcpu); | 1384 | struct vcpu_svm *svm = to_svm(vcpu); |
1266 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1385 | u32 excp; |
1386 | |||
1387 | if (is_nested(svm)) { | ||
1388 | u32 h_excp, n_excp; | ||
1389 | |||
1390 | h_excp = svm->nested.hsave->control.intercept_exceptions; | ||
1391 | n_excp = svm->nested.intercept_exceptions; | ||
1392 | h_excp &= ~(1 << NM_VECTOR); | ||
1393 | excp = h_excp | n_excp; | ||
1394 | } else { | ||
1395 | excp = svm->vmcb->control.intercept_exceptions; | ||
1396 | excp &= ~(1 << NM_VECTOR); | ||
1397 | } | ||
1398 | |||
1399 | svm->vmcb->control.intercept_exceptions = excp; | ||
1400 | |||
1267 | svm->vcpu.fpu_active = 1; | 1401 | svm->vcpu.fpu_active = 1; |
1268 | update_cr0_intercept(svm); | 1402 | update_cr0_intercept(svm); |
1269 | } | 1403 | } |
@@ -1304,29 +1438,23 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
1304 | 1438 | ||
1305 | static int io_interception(struct vcpu_svm *svm) | 1439 | static int io_interception(struct vcpu_svm *svm) |
1306 | { | 1440 | { |
1441 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
1307 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1442 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
1308 | int size, in, string; | 1443 | int size, in, string; |
1309 | unsigned port; | 1444 | unsigned port; |
1310 | 1445 | ||
1311 | ++svm->vcpu.stat.io_exits; | 1446 | ++svm->vcpu.stat.io_exits; |
1312 | |||
1313 | svm->next_rip = svm->vmcb->control.exit_info_2; | ||
1314 | |||
1315 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1447 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1316 | |||
1317 | if (string) { | ||
1318 | if (emulate_instruction(&svm->vcpu, | ||
1319 | 0, 0, 0) == EMULATE_DO_MMIO) | ||
1320 | return 0; | ||
1321 | return 1; | ||
1322 | } | ||
1323 | |||
1324 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1448 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1449 | if (string || in) | ||
1450 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | ||
1451 | |||
1325 | port = io_info >> 16; | 1452 | port = io_info >> 16; |
1326 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1453 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1327 | 1454 | svm->next_rip = svm->vmcb->control.exit_info_2; | |
1328 | skip_emulated_instruction(&svm->vcpu); | 1455 | skip_emulated_instruction(&svm->vcpu); |
1329 | return kvm_emulate_pio(&svm->vcpu, in, size, port); | 1456 | |
1457 | return kvm_fast_pio_out(vcpu, size, port); | ||
1330 | } | 1458 | } |
1331 | 1459 | ||
1332 | static int nmi_interception(struct vcpu_svm *svm) | 1460 | static int nmi_interception(struct vcpu_svm *svm) |
@@ -1379,6 +1507,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) | |||
1379 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 1507 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
1380 | bool has_error_code, u32 error_code) | 1508 | bool has_error_code, u32 error_code) |
1381 | { | 1509 | { |
1510 | int vmexit; | ||
1511 | |||
1382 | if (!is_nested(svm)) | 1512 | if (!is_nested(svm)) |
1383 | return 0; | 1513 | return 0; |
1384 | 1514 | ||
@@ -1387,21 +1517,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
1387 | svm->vmcb->control.exit_info_1 = error_code; | 1517 | svm->vmcb->control.exit_info_1 = error_code; |
1388 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; | 1518 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; |
1389 | 1519 | ||
1390 | return nested_svm_exit_handled(svm); | 1520 | vmexit = nested_svm_intercept(svm); |
1521 | if (vmexit == NESTED_EXIT_DONE) | ||
1522 | svm->nested.exit_required = true; | ||
1523 | |||
1524 | return vmexit; | ||
1391 | } | 1525 | } |
1392 | 1526 | ||
1393 | static inline int nested_svm_intr(struct vcpu_svm *svm) | 1527 | /* This function returns true if it is save to enable the irq window */ |
1528 | static inline bool nested_svm_intr(struct vcpu_svm *svm) | ||
1394 | { | 1529 | { |
1395 | if (!is_nested(svm)) | 1530 | if (!is_nested(svm)) |
1396 | return 0; | 1531 | return true; |
1397 | 1532 | ||
1398 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | 1533 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) |
1399 | return 0; | 1534 | return true; |
1400 | 1535 | ||
1401 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) | 1536 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) |
1402 | return 0; | 1537 | return false; |
1403 | 1538 | ||
1404 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1539 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
1540 | svm->vmcb->control.exit_info_1 = 0; | ||
1541 | svm->vmcb->control.exit_info_2 = 0; | ||
1405 | 1542 | ||
1406 | if (svm->nested.intercept & 1ULL) { | 1543 | if (svm->nested.intercept & 1ULL) { |
1407 | /* | 1544 | /* |
@@ -1412,21 +1549,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
1412 | */ | 1549 | */ |
1413 | svm->nested.exit_required = true; | 1550 | svm->nested.exit_required = true; |
1414 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | 1551 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); |
1415 | return 1; | 1552 | return false; |
1416 | } | 1553 | } |
1417 | 1554 | ||
1418 | return 0; | 1555 | return true; |
1556 | } | ||
1557 | |||
1558 | /* This function returns true if it is save to enable the nmi window */ | ||
1559 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) | ||
1560 | { | ||
1561 | if (!is_nested(svm)) | ||
1562 | return true; | ||
1563 | |||
1564 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) | ||
1565 | return true; | ||
1566 | |||
1567 | svm->vmcb->control.exit_code = SVM_EXIT_NMI; | ||
1568 | svm->nested.exit_required = true; | ||
1569 | |||
1570 | return false; | ||
1419 | } | 1571 | } |
1420 | 1572 | ||
1421 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | 1573 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) |
1422 | { | 1574 | { |
1423 | struct page *page; | 1575 | struct page *page; |
1424 | 1576 | ||
1577 | might_sleep(); | ||
1578 | |||
1425 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1579 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
1426 | if (is_error_page(page)) | 1580 | if (is_error_page(page)) |
1427 | goto error; | 1581 | goto error; |
1428 | 1582 | ||
1429 | return kmap_atomic(page, idx); | 1583 | *_page = page; |
1584 | |||
1585 | return kmap(page); | ||
1430 | 1586 | ||
1431 | error: | 1587 | error: |
1432 | kvm_release_page_clean(page); | 1588 | kvm_release_page_clean(page); |
@@ -1435,61 +1591,55 @@ error: | |||
1435 | return NULL; | 1591 | return NULL; |
1436 | } | 1592 | } |
1437 | 1593 | ||
1438 | static void nested_svm_unmap(void *addr, enum km_type idx) | 1594 | static void nested_svm_unmap(struct page *page) |
1439 | { | 1595 | { |
1440 | struct page *page; | 1596 | kunmap(page); |
1597 | kvm_release_page_dirty(page); | ||
1598 | } | ||
1441 | 1599 | ||
1442 | if (!addr) | 1600 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
1443 | return; | 1601 | { |
1602 | unsigned port; | ||
1603 | u8 val, bit; | ||
1604 | u64 gpa; | ||
1444 | 1605 | ||
1445 | page = kmap_atomic_to_page(addr); | 1606 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) |
1607 | return NESTED_EXIT_HOST; | ||
1446 | 1608 | ||
1447 | kunmap_atomic(addr, idx); | 1609 | port = svm->vmcb->control.exit_info_1 >> 16; |
1448 | kvm_release_page_dirty(page); | 1610 | gpa = svm->nested.vmcb_iopm + (port / 8); |
1611 | bit = port % 8; | ||
1612 | val = 0; | ||
1613 | |||
1614 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) | ||
1615 | val &= (1 << bit); | ||
1616 | |||
1617 | return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | ||
1449 | } | 1618 | } |
1450 | 1619 | ||
1451 | static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) | 1620 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) |
1452 | { | 1621 | { |
1453 | u32 param = svm->vmcb->control.exit_info_1 & 1; | 1622 | u32 offset, msr, value; |
1454 | u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1623 | int write, mask; |
1455 | bool ret = false; | ||
1456 | u32 t0, t1; | ||
1457 | u8 *msrpm; | ||
1458 | 1624 | ||
1459 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) | 1625 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
1460 | return false; | 1626 | return NESTED_EXIT_HOST; |
1461 | |||
1462 | msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | ||
1463 | 1627 | ||
1464 | if (!msrpm) | 1628 | msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
1465 | goto out; | 1629 | offset = svm_msrpm_offset(msr); |
1630 | write = svm->vmcb->control.exit_info_1 & 1; | ||
1631 | mask = 1 << ((2 * (msr & 0xf)) + write); | ||
1466 | 1632 | ||
1467 | switch (msr) { | 1633 | if (offset == MSR_INVALID) |
1468 | case 0 ... 0x1fff: | 1634 | return NESTED_EXIT_DONE; |
1469 | t0 = (msr * 2) % 8; | ||
1470 | t1 = msr / 8; | ||
1471 | break; | ||
1472 | case 0xc0000000 ... 0xc0001fff: | ||
1473 | t0 = (8192 + msr - 0xc0000000) * 2; | ||
1474 | t1 = (t0 / 8); | ||
1475 | t0 %= 8; | ||
1476 | break; | ||
1477 | case 0xc0010000 ... 0xc0011fff: | ||
1478 | t0 = (16384 + msr - 0xc0010000) * 2; | ||
1479 | t1 = (t0 / 8); | ||
1480 | t0 %= 8; | ||
1481 | break; | ||
1482 | default: | ||
1483 | ret = true; | ||
1484 | goto out; | ||
1485 | } | ||
1486 | 1635 | ||
1487 | ret = msrpm[t1] & ((1 << param) << t0); | 1636 | /* Offset is in 32 bit units but need in 8 bit units */ |
1637 | offset *= 4; | ||
1488 | 1638 | ||
1489 | out: | 1639 | if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) |
1490 | nested_svm_unmap(msrpm, KM_USER0); | 1640 | return NESTED_EXIT_DONE; |
1491 | 1641 | ||
1492 | return ret; | 1642 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
1493 | } | 1643 | } |
1494 | 1644 | ||
1495 | static int nested_svm_exit_special(struct vcpu_svm *svm) | 1645 | static int nested_svm_exit_special(struct vcpu_svm *svm) |
@@ -1500,16 +1650,19 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1500 | case SVM_EXIT_INTR: | 1650 | case SVM_EXIT_INTR: |
1501 | case SVM_EXIT_NMI: | 1651 | case SVM_EXIT_NMI: |
1502 | return NESTED_EXIT_HOST; | 1652 | return NESTED_EXIT_HOST; |
1503 | /* For now we are always handling NPFs when using them */ | ||
1504 | case SVM_EXIT_NPF: | 1653 | case SVM_EXIT_NPF: |
1654 | /* For now we are always handling NPFs when using them */ | ||
1505 | if (npt_enabled) | 1655 | if (npt_enabled) |
1506 | return NESTED_EXIT_HOST; | 1656 | return NESTED_EXIT_HOST; |
1507 | break; | 1657 | break; |
1508 | /* When we're shadowing, trap PFs */ | ||
1509 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | 1658 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: |
1659 | /* When we're shadowing, trap PFs */ | ||
1510 | if (!npt_enabled) | 1660 | if (!npt_enabled) |
1511 | return NESTED_EXIT_HOST; | 1661 | return NESTED_EXIT_HOST; |
1512 | break; | 1662 | break; |
1663 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | ||
1664 | nm_interception(svm); | ||
1665 | break; | ||
1513 | default: | 1666 | default: |
1514 | break; | 1667 | break; |
1515 | } | 1668 | } |
@@ -1520,7 +1673,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1520 | /* | 1673 | /* |
1521 | * If this function returns true, this #vmexit was already handled | 1674 | * If this function returns true, this #vmexit was already handled |
1522 | */ | 1675 | */ |
1523 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | 1676 | static int nested_svm_intercept(struct vcpu_svm *svm) |
1524 | { | 1677 | { |
1525 | u32 exit_code = svm->vmcb->control.exit_code; | 1678 | u32 exit_code = svm->vmcb->control.exit_code; |
1526 | int vmexit = NESTED_EXIT_HOST; | 1679 | int vmexit = NESTED_EXIT_HOST; |
@@ -1529,6 +1682,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1529 | case SVM_EXIT_MSR: | 1682 | case SVM_EXIT_MSR: |
1530 | vmexit = nested_svm_exit_handled_msr(svm); | 1683 | vmexit = nested_svm_exit_handled_msr(svm); |
1531 | break; | 1684 | break; |
1685 | case SVM_EXIT_IOIO: | ||
1686 | vmexit = nested_svm_intercept_ioio(svm); | ||
1687 | break; | ||
1532 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | 1688 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { |
1533 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | 1689 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); |
1534 | if (svm->nested.intercept_cr_read & cr_bits) | 1690 | if (svm->nested.intercept_cr_read & cr_bits) |
@@ -1566,9 +1722,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1566 | } | 1722 | } |
1567 | } | 1723 | } |
1568 | 1724 | ||
1569 | if (vmexit == NESTED_EXIT_DONE) { | 1725 | return vmexit; |
1726 | } | ||
1727 | |||
1728 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | ||
1729 | { | ||
1730 | int vmexit; | ||
1731 | |||
1732 | vmexit = nested_svm_intercept(svm); | ||
1733 | |||
1734 | if (vmexit == NESTED_EXIT_DONE) | ||
1570 | nested_svm_vmexit(svm); | 1735 | nested_svm_vmexit(svm); |
1571 | } | ||
1572 | 1736 | ||
1573 | return vmexit; | 1737 | return vmexit; |
1574 | } | 1738 | } |
@@ -1610,6 +1774,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1610 | struct vmcb *nested_vmcb; | 1774 | struct vmcb *nested_vmcb; |
1611 | struct vmcb *hsave = svm->nested.hsave; | 1775 | struct vmcb *hsave = svm->nested.hsave; |
1612 | struct vmcb *vmcb = svm->vmcb; | 1776 | struct vmcb *vmcb = svm->vmcb; |
1777 | struct page *page; | ||
1613 | 1778 | ||
1614 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | 1779 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, |
1615 | vmcb->control.exit_info_1, | 1780 | vmcb->control.exit_info_1, |
@@ -1617,10 +1782,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1617 | vmcb->control.exit_int_info, | 1782 | vmcb->control.exit_int_info, |
1618 | vmcb->control.exit_int_info_err); | 1783 | vmcb->control.exit_int_info_err); |
1619 | 1784 | ||
1620 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1785 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); |
1621 | if (!nested_vmcb) | 1786 | if (!nested_vmcb) |
1622 | return 1; | 1787 | return 1; |
1623 | 1788 | ||
1789 | /* Exit nested SVM mode */ | ||
1790 | svm->nested.vmcb = 0; | ||
1791 | |||
1624 | /* Give the current vmcb to the guest */ | 1792 | /* Give the current vmcb to the guest */ |
1625 | disable_gif(svm); | 1793 | disable_gif(svm); |
1626 | 1794 | ||
@@ -1630,9 +1798,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1630 | nested_vmcb->save.ds = vmcb->save.ds; | 1798 | nested_vmcb->save.ds = vmcb->save.ds; |
1631 | nested_vmcb->save.gdtr = vmcb->save.gdtr; | 1799 | nested_vmcb->save.gdtr = vmcb->save.gdtr; |
1632 | nested_vmcb->save.idtr = vmcb->save.idtr; | 1800 | nested_vmcb->save.idtr = vmcb->save.idtr; |
1801 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); | ||
1633 | if (npt_enabled) | 1802 | if (npt_enabled) |
1634 | nested_vmcb->save.cr3 = vmcb->save.cr3; | 1803 | nested_vmcb->save.cr3 = vmcb->save.cr3; |
1804 | else | ||
1805 | nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; | ||
1635 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 1806 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
1807 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | ||
1636 | nested_vmcb->save.rflags = vmcb->save.rflags; | 1808 | nested_vmcb->save.rflags = vmcb->save.rflags; |
1637 | nested_vmcb->save.rip = vmcb->save.rip; | 1809 | nested_vmcb->save.rip = vmcb->save.rip; |
1638 | nested_vmcb->save.rsp = vmcb->save.rsp; | 1810 | nested_vmcb->save.rsp = vmcb->save.rsp; |
@@ -1704,10 +1876,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1704 | svm->vmcb->save.cpl = 0; | 1876 | svm->vmcb->save.cpl = 0; |
1705 | svm->vmcb->control.exit_int_info = 0; | 1877 | svm->vmcb->control.exit_int_info = 0; |
1706 | 1878 | ||
1707 | /* Exit nested SVM mode */ | 1879 | nested_svm_unmap(page); |
1708 | svm->nested.vmcb = 0; | ||
1709 | |||
1710 | nested_svm_unmap(nested_vmcb, KM_USER0); | ||
1711 | 1880 | ||
1712 | kvm_mmu_reset_context(&svm->vcpu); | 1881 | kvm_mmu_reset_context(&svm->vcpu); |
1713 | kvm_mmu_load(&svm->vcpu); | 1882 | kvm_mmu_load(&svm->vcpu); |
@@ -1717,19 +1886,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1717 | 1886 | ||
1718 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | 1887 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) |
1719 | { | 1888 | { |
1720 | u32 *nested_msrpm; | 1889 | /* |
1890 | * This function merges the msr permission bitmaps of kvm and the | ||
1891 | * nested vmcb. It is omptimized in that it only merges the parts where | ||
1892 | * the kvm msr permission bitmap may contain zero bits | ||
1893 | */ | ||
1721 | int i; | 1894 | int i; |
1722 | 1895 | ||
1723 | nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1896 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
1724 | if (!nested_msrpm) | 1897 | return true; |
1725 | return false; | ||
1726 | 1898 | ||
1727 | for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) | 1899 | for (i = 0; i < MSRPM_OFFSETS; i++) { |
1728 | svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; | 1900 | u32 value, p; |
1901 | u64 offset; | ||
1729 | 1902 | ||
1730 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); | 1903 | if (msrpm_offsets[i] == 0xffffffff) |
1904 | break; | ||
1905 | |||
1906 | p = msrpm_offsets[i]; | ||
1907 | offset = svm->nested.vmcb_msrpm + (p * 4); | ||
1908 | |||
1909 | if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) | ||
1910 | return false; | ||
1911 | |||
1912 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | ||
1913 | } | ||
1731 | 1914 | ||
1732 | nested_svm_unmap(nested_msrpm, KM_USER0); | 1915 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); |
1733 | 1916 | ||
1734 | return true; | 1917 | return true; |
1735 | } | 1918 | } |
@@ -1739,26 +1922,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1739 | struct vmcb *nested_vmcb; | 1922 | struct vmcb *nested_vmcb; |
1740 | struct vmcb *hsave = svm->nested.hsave; | 1923 | struct vmcb *hsave = svm->nested.hsave; |
1741 | struct vmcb *vmcb = svm->vmcb; | 1924 | struct vmcb *vmcb = svm->vmcb; |
1925 | struct page *page; | ||
1926 | u64 vmcb_gpa; | ||
1927 | |||
1928 | vmcb_gpa = svm->vmcb->save.rax; | ||
1742 | 1929 | ||
1743 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 1930 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1744 | if (!nested_vmcb) | 1931 | if (!nested_vmcb) |
1745 | return false; | 1932 | return false; |
1746 | 1933 | ||
1747 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1934 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, |
1748 | svm->nested.vmcb = svm->vmcb->save.rax; | ||
1749 | |||
1750 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
1751 | nested_vmcb->save.rip, | 1935 | nested_vmcb->save.rip, |
1752 | nested_vmcb->control.int_ctl, | 1936 | nested_vmcb->control.int_ctl, |
1753 | nested_vmcb->control.event_inj, | 1937 | nested_vmcb->control.event_inj, |
1754 | nested_vmcb->control.nested_ctl); | 1938 | nested_vmcb->control.nested_ctl); |
1755 | 1939 | ||
1940 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, | ||
1941 | nested_vmcb->control.intercept_cr_write, | ||
1942 | nested_vmcb->control.intercept_exceptions, | ||
1943 | nested_vmcb->control.intercept); | ||
1944 | |||
1756 | /* Clear internal status */ | 1945 | /* Clear internal status */ |
1757 | kvm_clear_exception_queue(&svm->vcpu); | 1946 | kvm_clear_exception_queue(&svm->vcpu); |
1758 | kvm_clear_interrupt_queue(&svm->vcpu); | 1947 | kvm_clear_interrupt_queue(&svm->vcpu); |
1759 | 1948 | ||
1760 | /* Save the old vmcb, so we don't need to pick what we save, but | 1949 | /* |
1761 | can restore everything when a VMEXIT occurs */ | 1950 | * Save the old vmcb, so we don't need to pick what we save, but can |
1951 | * restore everything when a VMEXIT occurs | ||
1952 | */ | ||
1762 | hsave->save.es = vmcb->save.es; | 1953 | hsave->save.es = vmcb->save.es; |
1763 | hsave->save.cs = vmcb->save.cs; | 1954 | hsave->save.cs = vmcb->save.cs; |
1764 | hsave->save.ss = vmcb->save.ss; | 1955 | hsave->save.ss = vmcb->save.ss; |
@@ -1798,14 +1989,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1798 | if (npt_enabled) { | 1989 | if (npt_enabled) { |
1799 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 1990 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
1800 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 1991 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
1801 | } else { | 1992 | } else |
1802 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 1993 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
1803 | kvm_mmu_reset_context(&svm->vcpu); | 1994 | |
1804 | } | 1995 | /* Guest paging mode is active - reset mmu */ |
1996 | kvm_mmu_reset_context(&svm->vcpu); | ||
1997 | |||
1805 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; | 1998 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; |
1806 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); | 1999 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); |
1807 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); | 2000 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); |
1808 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); | 2001 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); |
2002 | |||
1809 | /* In case we don't even reach vcpu_run, the fields are not updated */ | 2003 | /* In case we don't even reach vcpu_run, the fields are not updated */ |
1810 | svm->vmcb->save.rax = nested_vmcb->save.rax; | 2004 | svm->vmcb->save.rax = nested_vmcb->save.rax; |
1811 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; | 2005 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; |
@@ -1814,22 +2008,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1814 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; | 2008 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; |
1815 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; | 2009 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; |
1816 | 2010 | ||
1817 | /* We don't want a nested guest to be more powerful than the guest, | 2011 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; |
1818 | so all intercepts are ORed */ | 2012 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; |
1819 | svm->vmcb->control.intercept_cr_read |= | ||
1820 | nested_vmcb->control.intercept_cr_read; | ||
1821 | svm->vmcb->control.intercept_cr_write |= | ||
1822 | nested_vmcb->control.intercept_cr_write; | ||
1823 | svm->vmcb->control.intercept_dr_read |= | ||
1824 | nested_vmcb->control.intercept_dr_read; | ||
1825 | svm->vmcb->control.intercept_dr_write |= | ||
1826 | nested_vmcb->control.intercept_dr_write; | ||
1827 | svm->vmcb->control.intercept_exceptions |= | ||
1828 | nested_vmcb->control.intercept_exceptions; | ||
1829 | |||
1830 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
1831 | |||
1832 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; | ||
1833 | 2013 | ||
1834 | /* cache intercepts */ | 2014 | /* cache intercepts */ |
1835 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; | 2015 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; |
@@ -1846,13 +2026,40 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1846 | else | 2026 | else |
1847 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 2027 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
1848 | 2028 | ||
2029 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { | ||
2030 | /* We only want the cr8 intercept bits of the guest */ | ||
2031 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; | ||
2032 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
2033 | } | ||
2034 | |||
2035 | /* | ||
2036 | * We don't want a nested guest to be more powerful than the guest, so | ||
2037 | * all intercepts are ORed | ||
2038 | */ | ||
2039 | svm->vmcb->control.intercept_cr_read |= | ||
2040 | nested_vmcb->control.intercept_cr_read; | ||
2041 | svm->vmcb->control.intercept_cr_write |= | ||
2042 | nested_vmcb->control.intercept_cr_write; | ||
2043 | svm->vmcb->control.intercept_dr_read |= | ||
2044 | nested_vmcb->control.intercept_dr_read; | ||
2045 | svm->vmcb->control.intercept_dr_write |= | ||
2046 | nested_vmcb->control.intercept_dr_write; | ||
2047 | svm->vmcb->control.intercept_exceptions |= | ||
2048 | nested_vmcb->control.intercept_exceptions; | ||
2049 | |||
2050 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
2051 | |||
2052 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; | ||
1849 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 2053 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
1850 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 2054 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
1851 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 2055 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
1852 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 2056 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
1853 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 2057 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
1854 | 2058 | ||
1855 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2059 | nested_svm_unmap(page); |
2060 | |||
2061 | /* nested_vmcb is our indicator if nested SVM is activated */ | ||
2062 | svm->nested.vmcb = vmcb_gpa; | ||
1856 | 2063 | ||
1857 | enable_gif(svm); | 2064 | enable_gif(svm); |
1858 | 2065 | ||
@@ -1878,6 +2085,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
1878 | static int vmload_interception(struct vcpu_svm *svm) | 2085 | static int vmload_interception(struct vcpu_svm *svm) |
1879 | { | 2086 | { |
1880 | struct vmcb *nested_vmcb; | 2087 | struct vmcb *nested_vmcb; |
2088 | struct page *page; | ||
1881 | 2089 | ||
1882 | if (nested_svm_check_permissions(svm)) | 2090 | if (nested_svm_check_permissions(svm)) |
1883 | return 1; | 2091 | return 1; |
@@ -1885,12 +2093,12 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
1885 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2093 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1886 | skip_emulated_instruction(&svm->vcpu); | 2094 | skip_emulated_instruction(&svm->vcpu); |
1887 | 2095 | ||
1888 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2096 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1889 | if (!nested_vmcb) | 2097 | if (!nested_vmcb) |
1890 | return 1; | 2098 | return 1; |
1891 | 2099 | ||
1892 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 2100 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
1893 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2101 | nested_svm_unmap(page); |
1894 | 2102 | ||
1895 | return 1; | 2103 | return 1; |
1896 | } | 2104 | } |
@@ -1898,6 +2106,7 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
1898 | static int vmsave_interception(struct vcpu_svm *svm) | 2106 | static int vmsave_interception(struct vcpu_svm *svm) |
1899 | { | 2107 | { |
1900 | struct vmcb *nested_vmcb; | 2108 | struct vmcb *nested_vmcb; |
2109 | struct page *page; | ||
1901 | 2110 | ||
1902 | if (nested_svm_check_permissions(svm)) | 2111 | if (nested_svm_check_permissions(svm)) |
1903 | return 1; | 2112 | return 1; |
@@ -1905,12 +2114,12 @@ static int vmsave_interception(struct vcpu_svm *svm) | |||
1905 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2114 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1906 | skip_emulated_instruction(&svm->vcpu); | 2115 | skip_emulated_instruction(&svm->vcpu); |
1907 | 2116 | ||
1908 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2117 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1909 | if (!nested_vmcb) | 2118 | if (!nested_vmcb) |
1910 | return 1; | 2119 | return 1; |
1911 | 2120 | ||
1912 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 2121 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
1913 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2122 | nested_svm_unmap(page); |
1914 | 2123 | ||
1915 | return 1; | 2124 | return 1; |
1916 | } | 2125 | } |
@@ -2013,6 +2222,8 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2013 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; | 2222 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; |
2014 | uint32_t idt_v = | 2223 | uint32_t idt_v = |
2015 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; | 2224 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; |
2225 | bool has_error_code = false; | ||
2226 | u32 error_code = 0; | ||
2016 | 2227 | ||
2017 | tss_selector = (u16)svm->vmcb->control.exit_info_1; | 2228 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
2018 | 2229 | ||
@@ -2033,6 +2244,12 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2033 | svm->vcpu.arch.nmi_injected = false; | 2244 | svm->vcpu.arch.nmi_injected = false; |
2034 | break; | 2245 | break; |
2035 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2246 | case SVM_EXITINTINFO_TYPE_EXEPT: |
2247 | if (svm->vmcb->control.exit_info_2 & | ||
2248 | (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { | ||
2249 | has_error_code = true; | ||
2250 | error_code = | ||
2251 | (u32)svm->vmcb->control.exit_info_2; | ||
2252 | } | ||
2036 | kvm_clear_exception_queue(&svm->vcpu); | 2253 | kvm_clear_exception_queue(&svm->vcpu); |
2037 | break; | 2254 | break; |
2038 | case SVM_EXITINTINFO_TYPE_INTR: | 2255 | case SVM_EXITINTINFO_TYPE_INTR: |
@@ -2049,7 +2266,14 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2049 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | 2266 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) |
2050 | skip_emulated_instruction(&svm->vcpu); | 2267 | skip_emulated_instruction(&svm->vcpu); |
2051 | 2268 | ||
2052 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2269 | if (kvm_task_switch(&svm->vcpu, tss_selector, reason, |
2270 | has_error_code, error_code) == EMULATE_FAIL) { | ||
2271 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
2272 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
2273 | svm->vcpu.run->internal.ndata = 0; | ||
2274 | return 0; | ||
2275 | } | ||
2276 | return 1; | ||
2053 | } | 2277 | } |
2054 | 2278 | ||
2055 | static int cpuid_interception(struct vcpu_svm *svm) | 2279 | static int cpuid_interception(struct vcpu_svm *svm) |
@@ -2140,9 +2364,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2140 | case MSR_IA32_SYSENTER_ESP: | 2364 | case MSR_IA32_SYSENTER_ESP: |
2141 | *data = svm->sysenter_esp; | 2365 | *data = svm->sysenter_esp; |
2142 | break; | 2366 | break; |
2143 | /* Nobody will change the following 5 values in the VMCB so | 2367 | /* |
2144 | we can safely return them on rdmsr. They will always be 0 | 2368 | * Nobody will change the following 5 values in the VMCB so we can |
2145 | until LBRV is implemented. */ | 2369 | * safely return them on rdmsr. They will always be 0 until LBRV is |
2370 | * implemented. | ||
2371 | */ | ||
2146 | case MSR_IA32_DEBUGCTLMSR: | 2372 | case MSR_IA32_DEBUGCTLMSR: |
2147 | *data = svm->vmcb->save.dbgctl; | 2373 | *data = svm->vmcb->save.dbgctl; |
2148 | break; | 2374 | break; |
@@ -2162,7 +2388,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2162 | *data = svm->nested.hsave_msr; | 2388 | *data = svm->nested.hsave_msr; |
2163 | break; | 2389 | break; |
2164 | case MSR_VM_CR: | 2390 | case MSR_VM_CR: |
2165 | *data = 0; | 2391 | *data = svm->nested.vm_cr_msr; |
2166 | break; | 2392 | break; |
2167 | case MSR_IA32_UCODE_REV: | 2393 | case MSR_IA32_UCODE_REV: |
2168 | *data = 0x01000065; | 2394 | *data = 0x01000065; |
@@ -2192,6 +2418,31 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
2192 | return 1; | 2418 | return 1; |
2193 | } | 2419 | } |
2194 | 2420 | ||
2421 | static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) | ||
2422 | { | ||
2423 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2424 | int svm_dis, chg_mask; | ||
2425 | |||
2426 | if (data & ~SVM_VM_CR_VALID_MASK) | ||
2427 | return 1; | ||
2428 | |||
2429 | chg_mask = SVM_VM_CR_VALID_MASK; | ||
2430 | |||
2431 | if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) | ||
2432 | chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); | ||
2433 | |||
2434 | svm->nested.vm_cr_msr &= ~chg_mask; | ||
2435 | svm->nested.vm_cr_msr |= (data & chg_mask); | ||
2436 | |||
2437 | svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; | ||
2438 | |||
2439 | /* check for svm_disable while efer.svme is set */ | ||
2440 | if (svm_dis && (vcpu->arch.efer & EFER_SVME)) | ||
2441 | return 1; | ||
2442 | |||
2443 | return 0; | ||
2444 | } | ||
2445 | |||
2195 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | 2446 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) |
2196 | { | 2447 | { |
2197 | struct vcpu_svm *svm = to_svm(vcpu); | 2448 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -2258,6 +2509,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2258 | svm->nested.hsave_msr = data; | 2509 | svm->nested.hsave_msr = data; |
2259 | break; | 2510 | break; |
2260 | case MSR_VM_CR: | 2511 | case MSR_VM_CR: |
2512 | return svm_set_vm_cr(vcpu, data); | ||
2261 | case MSR_VM_IGNNE: | 2513 | case MSR_VM_IGNNE: |
2262 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 2514 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
2263 | break; | 2515 | break; |
@@ -2321,16 +2573,16 @@ static int pause_interception(struct vcpu_svm *svm) | |||
2321 | } | 2573 | } |
2322 | 2574 | ||
2323 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 2575 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { |
2324 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2576 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
2325 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2577 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2326 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2578 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
2327 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2579 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
2328 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 2580 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2329 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2581 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
2330 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2582 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
2331 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2583 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
2332 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 2584 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
2333 | [SVM_EXIT_READ_DR0] = emulate_on_interception, | 2585 | [SVM_EXIT_READ_DR0] = emulate_on_interception, |
2334 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2586 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
2335 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2587 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
2336 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2588 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
@@ -2349,15 +2601,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2349 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2601 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2350 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2602 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
2351 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 2603 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
2352 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 2604 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
2353 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 2605 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
2354 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 2606 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
2355 | [SVM_EXIT_INTR] = intr_interception, | 2607 | [SVM_EXIT_INTR] = intr_interception, |
2356 | [SVM_EXIT_NMI] = nmi_interception, | 2608 | [SVM_EXIT_NMI] = nmi_interception, |
2357 | [SVM_EXIT_SMI] = nop_on_interception, | 2609 | [SVM_EXIT_SMI] = nop_on_interception, |
2358 | [SVM_EXIT_INIT] = nop_on_interception, | 2610 | [SVM_EXIT_INIT] = nop_on_interception, |
2359 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 2611 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
2360 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ | ||
2361 | [SVM_EXIT_CPUID] = cpuid_interception, | 2612 | [SVM_EXIT_CPUID] = cpuid_interception, |
2362 | [SVM_EXIT_IRET] = iret_interception, | 2613 | [SVM_EXIT_IRET] = iret_interception, |
2363 | [SVM_EXIT_INVD] = emulate_on_interception, | 2614 | [SVM_EXIT_INVD] = emulate_on_interception, |
@@ -2365,7 +2616,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2365 | [SVM_EXIT_HLT] = halt_interception, | 2616 | [SVM_EXIT_HLT] = halt_interception, |
2366 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2617 | [SVM_EXIT_INVLPG] = invlpg_interception, |
2367 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2618 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
2368 | [SVM_EXIT_IOIO] = io_interception, | 2619 | [SVM_EXIT_IOIO] = io_interception, |
2369 | [SVM_EXIT_MSR] = msr_interception, | 2620 | [SVM_EXIT_MSR] = msr_interception, |
2370 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, | 2621 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, |
2371 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, | 2622 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, |
@@ -2388,7 +2639,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2388 | struct kvm_run *kvm_run = vcpu->run; | 2639 | struct kvm_run *kvm_run = vcpu->run; |
2389 | u32 exit_code = svm->vmcb->control.exit_code; | 2640 | u32 exit_code = svm->vmcb->control.exit_code; |
2390 | 2641 | ||
2391 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2642 | trace_kvm_exit(exit_code, vcpu); |
2392 | 2643 | ||
2393 | if (unlikely(svm->nested.exit_required)) { | 2644 | if (unlikely(svm->nested.exit_required)) { |
2394 | nested_svm_vmexit(svm); | 2645 | nested_svm_vmexit(svm); |
@@ -2506,6 +2757,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
2506 | { | 2757 | { |
2507 | struct vcpu_svm *svm = to_svm(vcpu); | 2758 | struct vcpu_svm *svm = to_svm(vcpu); |
2508 | 2759 | ||
2760 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2761 | return; | ||
2762 | |||
2509 | if (irr == -1) | 2763 | if (irr == -1) |
2510 | return; | 2764 | return; |
2511 | 2765 | ||
@@ -2563,13 +2817,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
2563 | { | 2817 | { |
2564 | struct vcpu_svm *svm = to_svm(vcpu); | 2818 | struct vcpu_svm *svm = to_svm(vcpu); |
2565 | 2819 | ||
2566 | nested_svm_intr(svm); | 2820 | /* |
2567 | 2821 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | |
2568 | /* In case GIF=0 we can't rely on the CPU to tell us when | 2822 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
2569 | * GIF becomes 1, because that's a separate STGI/VMRUN intercept. | 2823 | * get that intercept, this function will be called again though and |
2570 | * The next time we get that intercept, this function will be | 2824 | * we'll get the vintr intercept. |
2571 | * called again though and we'll get the vintr intercept. */ | 2825 | */ |
2572 | if (gif_set(svm)) { | 2826 | if (gif_set(svm) && nested_svm_intr(svm)) { |
2573 | svm_set_vintr(svm); | 2827 | svm_set_vintr(svm); |
2574 | svm_inject_irq(svm, 0x0); | 2828 | svm_inject_irq(svm, 0x0); |
2575 | } | 2829 | } |
@@ -2583,12 +2837,15 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2583 | == HF_NMI_MASK) | 2837 | == HF_NMI_MASK) |
2584 | return; /* IRET will cause a vm exit */ | 2838 | return; /* IRET will cause a vm exit */ |
2585 | 2839 | ||
2586 | /* Something prevents NMI from been injected. Single step over | 2840 | /* |
2587 | possible problem (IRET or exception injection or interrupt | 2841 | * Something prevents NMI from been injected. Single step over possible |
2588 | shadow) */ | 2842 | * problem (IRET or exception injection or interrupt shadow) |
2589 | svm->nmi_singlestep = true; | 2843 | */ |
2590 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2844 | if (gif_set(svm) && nested_svm_nmi(svm)) { |
2591 | update_db_intercept(vcpu); | 2845 | svm->nmi_singlestep = true; |
2846 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
2847 | update_db_intercept(vcpu); | ||
2848 | } | ||
2592 | } | 2849 | } |
2593 | 2850 | ||
2594 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 2851 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
@@ -2609,6 +2866,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
2609 | { | 2866 | { |
2610 | struct vcpu_svm *svm = to_svm(vcpu); | 2867 | struct vcpu_svm *svm = to_svm(vcpu); |
2611 | 2868 | ||
2869 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2870 | return; | ||
2871 | |||
2612 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 2872 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { |
2613 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 2873 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
2614 | kvm_set_cr8(vcpu, cr8); | 2874 | kvm_set_cr8(vcpu, cr8); |
@@ -2620,6 +2880,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
2620 | struct vcpu_svm *svm = to_svm(vcpu); | 2880 | struct vcpu_svm *svm = to_svm(vcpu); |
2621 | u64 cr8; | 2881 | u64 cr8; |
2622 | 2882 | ||
2883 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2884 | return; | ||
2885 | |||
2623 | cr8 = kvm_get_cr8(vcpu); | 2886 | cr8 = kvm_get_cr8(vcpu); |
2624 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 2887 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; |
2625 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 2888 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
@@ -2630,6 +2893,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2630 | u8 vector; | 2893 | u8 vector; |
2631 | int type; | 2894 | int type; |
2632 | u32 exitintinfo = svm->vmcb->control.exit_int_info; | 2895 | u32 exitintinfo = svm->vmcb->control.exit_int_info; |
2896 | unsigned int3_injected = svm->int3_injected; | ||
2897 | |||
2898 | svm->int3_injected = 0; | ||
2633 | 2899 | ||
2634 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) | 2900 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) |
2635 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 2901 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
@@ -2649,12 +2915,21 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2649 | svm->vcpu.arch.nmi_injected = true; | 2915 | svm->vcpu.arch.nmi_injected = true; |
2650 | break; | 2916 | break; |
2651 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2917 | case SVM_EXITINTINFO_TYPE_EXEPT: |
2652 | /* In case of software exception do not reinject an exception | ||
2653 | vector, but re-execute and instruction instead */ | ||
2654 | if (is_nested(svm)) | 2918 | if (is_nested(svm)) |
2655 | break; | 2919 | break; |
2656 | if (kvm_exception_is_soft(vector)) | 2920 | /* |
2921 | * In case of software exceptions, do not reinject the vector, | ||
2922 | * but re-execute the instruction instead. Rewind RIP first | ||
2923 | * if we emulated INT3 before. | ||
2924 | */ | ||
2925 | if (kvm_exception_is_soft(vector)) { | ||
2926 | if (vector == BP_VECTOR && int3_injected && | ||
2927 | kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) | ||
2928 | kvm_rip_write(&svm->vcpu, | ||
2929 | kvm_rip_read(&svm->vcpu) - | ||
2930 | int3_injected); | ||
2657 | break; | 2931 | break; |
2932 | } | ||
2658 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { | 2933 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { |
2659 | u32 err = svm->vmcb->control.exit_int_info_err; | 2934 | u32 err = svm->vmcb->control.exit_int_info_err; |
2660 | kvm_queue_exception_e(&svm->vcpu, vector, err); | 2935 | kvm_queue_exception_e(&svm->vcpu, vector, err); |
@@ -2875,24 +3150,24 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | |||
2875 | } | 3150 | } |
2876 | 3151 | ||
2877 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 3152 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
2878 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 3153 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
2879 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 3154 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
2880 | { SVM_EXIT_READ_CR4, "read_cr4" }, | 3155 | { SVM_EXIT_READ_CR4, "read_cr4" }, |
2881 | { SVM_EXIT_READ_CR8, "read_cr8" }, | 3156 | { SVM_EXIT_READ_CR8, "read_cr8" }, |
2882 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, | 3157 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, |
2883 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, | 3158 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, |
2884 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, | 3159 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, |
2885 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, | 3160 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, |
2886 | { SVM_EXIT_READ_DR0, "read_dr0" }, | 3161 | { SVM_EXIT_READ_DR0, "read_dr0" }, |
2887 | { SVM_EXIT_READ_DR1, "read_dr1" }, | 3162 | { SVM_EXIT_READ_DR1, "read_dr1" }, |
2888 | { SVM_EXIT_READ_DR2, "read_dr2" }, | 3163 | { SVM_EXIT_READ_DR2, "read_dr2" }, |
2889 | { SVM_EXIT_READ_DR3, "read_dr3" }, | 3164 | { SVM_EXIT_READ_DR3, "read_dr3" }, |
2890 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, | 3165 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, |
2891 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, | 3166 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, |
2892 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, | 3167 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, |
2893 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, | 3168 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, |
2894 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, | 3169 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, |
2895 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, | 3170 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, |
2896 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, | 3171 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, |
2897 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, | 3172 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, |
2898 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, | 3173 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, |
@@ -2941,8 +3216,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
2941 | { | 3216 | { |
2942 | struct vcpu_svm *svm = to_svm(vcpu); | 3217 | struct vcpu_svm *svm = to_svm(vcpu); |
2943 | 3218 | ||
2944 | update_cr0_intercept(svm); | ||
2945 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | 3219 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; |
3220 | if (is_nested(svm)) | ||
3221 | svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
3222 | update_cr0_intercept(svm); | ||
2946 | } | 3223 | } |
2947 | 3224 | ||
2948 | static struct kvm_x86_ops svm_x86_ops = { | 3225 | static struct kvm_x86_ops svm_x86_ops = { |
@@ -2981,8 +3258,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2981 | .set_idt = svm_set_idt, | 3258 | .set_idt = svm_set_idt, |
2982 | .get_gdt = svm_get_gdt, | 3259 | .get_gdt = svm_get_gdt, |
2983 | .set_gdt = svm_set_gdt, | 3260 | .set_gdt = svm_set_gdt, |
2984 | .get_dr = svm_get_dr, | 3261 | .set_dr7 = svm_set_dr7, |
2985 | .set_dr = svm_set_dr, | ||
2986 | .cache_reg = svm_cache_reg, | 3262 | .cache_reg = svm_cache_reg, |
2987 | .get_rflags = svm_get_rflags, | 3263 | .get_rflags = svm_get_rflags, |
2988 | .set_rflags = svm_set_rflags, | 3264 | .set_rflags = svm_set_rflags, |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index eea40439066c..4ddadb1a5ffe 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -12,7 +12,8 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
12 | /* | 12 | /* |
13 | * There is a race window between reading and incrementing, but we do | 13 | * There is a race window between reading and incrementing, but we do |
14 | * not care about potentially loosing timer events in the !reinject | 14 | * not care about potentially loosing timer events in the !reinject |
15 | * case anyway. | 15 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked |
16 | * in vcpu_enter_guest. | ||
16 | */ | 17 | */ |
17 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
18 | atomic_inc(&ktimer->pending); | 19 | atomic_inc(&ktimer->pending); |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6ad30a29f044..a6544b8e7c0f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -5,8 +5,6 @@ | |||
5 | 5 | ||
6 | #undef TRACE_SYSTEM | 6 | #undef TRACE_SYSTEM |
7 | #define TRACE_SYSTEM kvm | 7 | #define TRACE_SYSTEM kvm |
8 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
9 | #define TRACE_INCLUDE_FILE trace | ||
10 | 8 | ||
11 | /* | 9 | /* |
12 | * Tracepoint for guest mode entry. | 10 | * Tracepoint for guest mode entry. |
@@ -184,8 +182,8 @@ TRACE_EVENT(kvm_apic, | |||
184 | * Tracepoint for kvm guest exit: | 182 | * Tracepoint for kvm guest exit: |
185 | */ | 183 | */ |
186 | TRACE_EVENT(kvm_exit, | 184 | TRACE_EVENT(kvm_exit, |
187 | TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), | 185 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), |
188 | TP_ARGS(exit_reason, guest_rip), | 186 | TP_ARGS(exit_reason, vcpu), |
189 | 187 | ||
190 | TP_STRUCT__entry( | 188 | TP_STRUCT__entry( |
191 | __field( unsigned int, exit_reason ) | 189 | __field( unsigned int, exit_reason ) |
@@ -194,7 +192,7 @@ TRACE_EVENT(kvm_exit, | |||
194 | 192 | ||
195 | TP_fast_assign( | 193 | TP_fast_assign( |
196 | __entry->exit_reason = exit_reason; | 194 | __entry->exit_reason = exit_reason; |
197 | __entry->guest_rip = guest_rip; | 195 | __entry->guest_rip = kvm_rip_read(vcpu); |
198 | ), | 196 | ), |
199 | 197 | ||
200 | TP_printk("reason %s rip 0x%lx", | 198 | TP_printk("reason %s rip 0x%lx", |
@@ -221,6 +219,38 @@ TRACE_EVENT(kvm_inj_virq, | |||
221 | TP_printk("irq %u", __entry->irq) | 219 | TP_printk("irq %u", __entry->irq) |
222 | ); | 220 | ); |
223 | 221 | ||
222 | #define EXS(x) { x##_VECTOR, "#" #x } | ||
223 | |||
224 | #define kvm_trace_sym_exc \ | ||
225 | EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ | ||
226 | EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ | ||
227 | EXS(MF), EXS(MC) | ||
228 | |||
229 | /* | ||
230 | * Tracepoint for kvm interrupt injection: | ||
231 | */ | ||
232 | TRACE_EVENT(kvm_inj_exception, | ||
233 | TP_PROTO(unsigned exception, bool has_error, unsigned error_code), | ||
234 | TP_ARGS(exception, has_error, error_code), | ||
235 | |||
236 | TP_STRUCT__entry( | ||
237 | __field( u8, exception ) | ||
238 | __field( u8, has_error ) | ||
239 | __field( u32, error_code ) | ||
240 | ), | ||
241 | |||
242 | TP_fast_assign( | ||
243 | __entry->exception = exception; | ||
244 | __entry->has_error = has_error; | ||
245 | __entry->error_code = error_code; | ||
246 | ), | ||
247 | |||
248 | TP_printk("%s (0x%x)", | ||
249 | __print_symbolic(__entry->exception, kvm_trace_sym_exc), | ||
250 | /* FIXME: don't print error_code if not present */ | ||
251 | __entry->has_error ? __entry->error_code : 0) | ||
252 | ); | ||
253 | |||
224 | /* | 254 | /* |
225 | * Tracepoint for page fault. | 255 | * Tracepoint for page fault. |
226 | */ | 256 | */ |
@@ -413,12 +443,34 @@ TRACE_EVENT(kvm_nested_vmrun, | |||
413 | ), | 443 | ), |
414 | 444 | ||
415 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | 445 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " |
416 | "event_inj: 0x%08x npt: %s\n", | 446 | "event_inj: 0x%08x npt: %s", |
417 | __entry->rip, __entry->vmcb, __entry->nested_rip, | 447 | __entry->rip, __entry->vmcb, __entry->nested_rip, |
418 | __entry->int_ctl, __entry->event_inj, | 448 | __entry->int_ctl, __entry->event_inj, |
419 | __entry->npt ? "on" : "off") | 449 | __entry->npt ? "on" : "off") |
420 | ); | 450 | ); |
421 | 451 | ||
452 | TRACE_EVENT(kvm_nested_intercepts, | ||
453 | TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), | ||
454 | TP_ARGS(cr_read, cr_write, exceptions, intercept), | ||
455 | |||
456 | TP_STRUCT__entry( | ||
457 | __field( __u16, cr_read ) | ||
458 | __field( __u16, cr_write ) | ||
459 | __field( __u32, exceptions ) | ||
460 | __field( __u64, intercept ) | ||
461 | ), | ||
462 | |||
463 | TP_fast_assign( | ||
464 | __entry->cr_read = cr_read; | ||
465 | __entry->cr_write = cr_write; | ||
466 | __entry->exceptions = exceptions; | ||
467 | __entry->intercept = intercept; | ||
468 | ), | ||
469 | |||
470 | TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", | ||
471 | __entry->cr_read, __entry->cr_write, __entry->exceptions, | ||
472 | __entry->intercept) | ||
473 | ); | ||
422 | /* | 474 | /* |
423 | * Tracepoint for #VMEXIT while nested | 475 | * Tracepoint for #VMEXIT while nested |
424 | */ | 476 | */ |
@@ -447,7 +499,7 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
447 | __entry->exit_int_info_err = exit_int_info_err; | 499 | __entry->exit_int_info_err = exit_int_info_err; |
448 | ), | 500 | ), |
449 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | 501 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " |
450 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 502 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
451 | __entry->rip, | 503 | __entry->rip, |
452 | ftrace_print_symbols_seq(p, __entry->exit_code, | 504 | ftrace_print_symbols_seq(p, __entry->exit_code, |
453 | kvm_x86_ops->exit_reasons_str), | 505 | kvm_x86_ops->exit_reasons_str), |
@@ -482,7 +534,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
482 | ), | 534 | ), |
483 | 535 | ||
484 | TP_printk("reason: %s ext_inf1: 0x%016llx " | 536 | TP_printk("reason: %s ext_inf1: 0x%016llx " |
485 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 537 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
486 | ftrace_print_symbols_seq(p, __entry->exit_code, | 538 | ftrace_print_symbols_seq(p, __entry->exit_code, |
487 | kvm_x86_ops->exit_reasons_str), | 539 | kvm_x86_ops->exit_reasons_str), |
488 | __entry->exit_info1, __entry->exit_info2, | 540 | __entry->exit_info1, __entry->exit_info2, |
@@ -504,7 +556,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit, | |||
504 | __entry->rip = rip | 556 | __entry->rip = rip |
505 | ), | 557 | ), |
506 | 558 | ||
507 | TP_printk("rip: 0x%016llx\n", __entry->rip) | 559 | TP_printk("rip: 0x%016llx", __entry->rip) |
508 | ); | 560 | ); |
509 | 561 | ||
510 | /* | 562 | /* |
@@ -526,7 +578,7 @@ TRACE_EVENT(kvm_invlpga, | |||
526 | __entry->address = address; | 578 | __entry->address = address; |
527 | ), | 579 | ), |
528 | 580 | ||
529 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | 581 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", |
530 | __entry->rip, __entry->asid, __entry->address) | 582 | __entry->rip, __entry->asid, __entry->address) |
531 | ); | 583 | ); |
532 | 584 | ||
@@ -547,11 +599,102 @@ TRACE_EVENT(kvm_skinit, | |||
547 | __entry->slb = slb; | 599 | __entry->slb = slb; |
548 | ), | 600 | ), |
549 | 601 | ||
550 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | 602 | TP_printk("rip: 0x%016llx slb: 0x%08x", |
551 | __entry->rip, __entry->slb) | 603 | __entry->rip, __entry->slb) |
552 | ); | 604 | ); |
553 | 605 | ||
606 | #define __print_insn(insn, ilen) ({ \ | ||
607 | int i; \ | ||
608 | const char *ret = p->buffer + p->len; \ | ||
609 | \ | ||
610 | for (i = 0; i < ilen; ++i) \ | ||
611 | trace_seq_printf(p, " %02x", insn[i]); \ | ||
612 | trace_seq_printf(p, "%c", 0); \ | ||
613 | ret; \ | ||
614 | }) | ||
615 | |||
616 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) | ||
617 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) | ||
618 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) | ||
619 | #define KVM_EMUL_INSN_F_CS_L (1 << 3) | ||
620 | |||
621 | #define kvm_trace_symbol_emul_flags \ | ||
622 | { 0, "real" }, \ | ||
623 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
624 | | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ | ||
625 | { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ | ||
626 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
627 | | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ | ||
628 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
629 | | KVM_EMUL_INSN_F_CS_L, "prot64" } | ||
630 | |||
631 | #define kei_decode_mode(mode) ({ \ | ||
632 | u8 flags = 0xff; \ | ||
633 | switch (mode) { \ | ||
634 | case X86EMUL_MODE_REAL: \ | ||
635 | flags = 0; \ | ||
636 | break; \ | ||
637 | case X86EMUL_MODE_VM86: \ | ||
638 | flags = KVM_EMUL_INSN_F_EFL_VM; \ | ||
639 | break; \ | ||
640 | case X86EMUL_MODE_PROT16: \ | ||
641 | flags = KVM_EMUL_INSN_F_CR0_PE; \ | ||
642 | break; \ | ||
643 | case X86EMUL_MODE_PROT32: \ | ||
644 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
645 | | KVM_EMUL_INSN_F_CS_D; \ | ||
646 | break; \ | ||
647 | case X86EMUL_MODE_PROT64: \ | ||
648 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
649 | | KVM_EMUL_INSN_F_CS_L; \ | ||
650 | break; \ | ||
651 | } \ | ||
652 | flags; \ | ||
653 | }) | ||
654 | |||
655 | TRACE_EVENT(kvm_emulate_insn, | ||
656 | TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), | ||
657 | TP_ARGS(vcpu, failed), | ||
658 | |||
659 | TP_STRUCT__entry( | ||
660 | __field( __u64, rip ) | ||
661 | __field( __u32, csbase ) | ||
662 | __field( __u8, len ) | ||
663 | __array( __u8, insn, 15 ) | ||
664 | __field( __u8, flags ) | ||
665 | __field( __u8, failed ) | ||
666 | ), | ||
667 | |||
668 | TP_fast_assign( | ||
669 | __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
670 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); | ||
671 | __entry->len = vcpu->arch.emulate_ctxt.decode.eip | ||
672 | - vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
673 | memcpy(__entry->insn, | ||
674 | vcpu->arch.emulate_ctxt.decode.fetch.data, | ||
675 | 15); | ||
676 | __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); | ||
677 | __entry->failed = failed; | ||
678 | ), | ||
679 | |||
680 | TP_printk("%x:%llx:%s (%s)%s", | ||
681 | __entry->csbase, __entry->rip, | ||
682 | __print_insn(__entry->insn, __entry->len), | ||
683 | __print_symbolic(__entry->flags, | ||
684 | kvm_trace_symbol_emul_flags), | ||
685 | __entry->failed ? " failed" : "" | ||
686 | ) | ||
687 | ); | ||
688 | |||
689 | #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) | ||
690 | #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) | ||
691 | |||
554 | #endif /* _TRACE_KVM_H */ | 692 | #endif /* _TRACE_KVM_H */ |
555 | 693 | ||
694 | #undef TRACE_INCLUDE_PATH | ||
695 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
696 | #undef TRACE_INCLUDE_FILE | ||
697 | #define TRACE_INCLUDE_FILE trace | ||
698 | |||
556 | /* This part must be outside protection */ | 699 | /* This part must be outside protection */ |
557 | #include <trace/define_trace.h> | 700 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 82be6dac3d25..54c0035a63f0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -77,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); | |||
77 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | 77 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) |
78 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | 78 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) |
79 | 79 | ||
80 | #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) | ||
81 | |||
80 | /* | 82 | /* |
81 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 83 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
82 | * ple_gap: upper bound on the amount of time between two successive | 84 | * ple_gap: upper bound on the amount of time between two successive |
@@ -131,7 +133,7 @@ struct vcpu_vmx { | |||
131 | } host_state; | 133 | } host_state; |
132 | struct { | 134 | struct { |
133 | int vm86_active; | 135 | int vm86_active; |
134 | u8 save_iopl; | 136 | ulong save_rflags; |
135 | struct kvm_save_segment { | 137 | struct kvm_save_segment { |
136 | u16 selector; | 138 | u16 selector; |
137 | unsigned long base; | 139 | unsigned long base; |
@@ -232,56 +234,56 @@ static const u32 vmx_msr_index[] = { | |||
232 | }; | 234 | }; |
233 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
234 | 236 | ||
235 | static inline int is_page_fault(u32 intr_info) | 237 | static inline bool is_page_fault(u32 intr_info) |
236 | { | 238 | { |
237 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 239 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
238 | INTR_INFO_VALID_MASK)) == | 240 | INTR_INFO_VALID_MASK)) == |
239 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 241 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); |
240 | } | 242 | } |
241 | 243 | ||
242 | static inline int is_no_device(u32 intr_info) | 244 | static inline bool is_no_device(u32 intr_info) |
243 | { | 245 | { |
244 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 246 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
245 | INTR_INFO_VALID_MASK)) == | 247 | INTR_INFO_VALID_MASK)) == |
246 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | 248 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); |
247 | } | 249 | } |
248 | 250 | ||
249 | static inline int is_invalid_opcode(u32 intr_info) | 251 | static inline bool is_invalid_opcode(u32 intr_info) |
250 | { | 252 | { |
251 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 253 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
252 | INTR_INFO_VALID_MASK)) == | 254 | INTR_INFO_VALID_MASK)) == |
253 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); | 255 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); |
254 | } | 256 | } |
255 | 257 | ||
256 | static inline int is_external_interrupt(u32 intr_info) | 258 | static inline bool is_external_interrupt(u32 intr_info) |
257 | { | 259 | { |
258 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 260 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
259 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 261 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
260 | } | 262 | } |
261 | 263 | ||
262 | static inline int is_machine_check(u32 intr_info) | 264 | static inline bool is_machine_check(u32 intr_info) |
263 | { | 265 | { |
264 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 266 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
265 | INTR_INFO_VALID_MASK)) == | 267 | INTR_INFO_VALID_MASK)) == |
266 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); | 268 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); |
267 | } | 269 | } |
268 | 270 | ||
269 | static inline int cpu_has_vmx_msr_bitmap(void) | 271 | static inline bool cpu_has_vmx_msr_bitmap(void) |
270 | { | 272 | { |
271 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; | 273 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; |
272 | } | 274 | } |
273 | 275 | ||
274 | static inline int cpu_has_vmx_tpr_shadow(void) | 276 | static inline bool cpu_has_vmx_tpr_shadow(void) |
275 | { | 277 | { |
276 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; | 278 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
277 | } | 279 | } |
278 | 280 | ||
279 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | 281 | static inline bool vm_need_tpr_shadow(struct kvm *kvm) |
280 | { | 282 | { |
281 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); | 283 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); |
282 | } | 284 | } |
283 | 285 | ||
284 | static inline int cpu_has_secondary_exec_ctrls(void) | 286 | static inline bool cpu_has_secondary_exec_ctrls(void) |
285 | { | 287 | { |
286 | return vmcs_config.cpu_based_exec_ctrl & | 288 | return vmcs_config.cpu_based_exec_ctrl & |
287 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 289 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -301,80 +303,80 @@ static inline bool cpu_has_vmx_flexpriority(void) | |||
301 | 303 | ||
302 | static inline bool cpu_has_vmx_ept_execute_only(void) | 304 | static inline bool cpu_has_vmx_ept_execute_only(void) |
303 | { | 305 | { |
304 | return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); | 306 | return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; |
305 | } | 307 | } |
306 | 308 | ||
307 | static inline bool cpu_has_vmx_eptp_uncacheable(void) | 309 | static inline bool cpu_has_vmx_eptp_uncacheable(void) |
308 | { | 310 | { |
309 | return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); | 311 | return vmx_capability.ept & VMX_EPTP_UC_BIT; |
310 | } | 312 | } |
311 | 313 | ||
312 | static inline bool cpu_has_vmx_eptp_writeback(void) | 314 | static inline bool cpu_has_vmx_eptp_writeback(void) |
313 | { | 315 | { |
314 | return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); | 316 | return vmx_capability.ept & VMX_EPTP_WB_BIT; |
315 | } | 317 | } |
316 | 318 | ||
317 | static inline bool cpu_has_vmx_ept_2m_page(void) | 319 | static inline bool cpu_has_vmx_ept_2m_page(void) |
318 | { | 320 | { |
319 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 321 | return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; |
320 | } | 322 | } |
321 | 323 | ||
322 | static inline bool cpu_has_vmx_ept_1g_page(void) | 324 | static inline bool cpu_has_vmx_ept_1g_page(void) |
323 | { | 325 | { |
324 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | 326 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
325 | } | 327 | } |
326 | 328 | ||
327 | static inline int cpu_has_vmx_invept_individual_addr(void) | 329 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
328 | { | 330 | { |
329 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 331 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
330 | } | 332 | } |
331 | 333 | ||
332 | static inline int cpu_has_vmx_invept_context(void) | 334 | static inline bool cpu_has_vmx_invept_context(void) |
333 | { | 335 | { |
334 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); | 336 | return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; |
335 | } | 337 | } |
336 | 338 | ||
337 | static inline int cpu_has_vmx_invept_global(void) | 339 | static inline bool cpu_has_vmx_invept_global(void) |
338 | { | 340 | { |
339 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); | 341 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
340 | } | 342 | } |
341 | 343 | ||
342 | static inline int cpu_has_vmx_ept(void) | 344 | static inline bool cpu_has_vmx_ept(void) |
343 | { | 345 | { |
344 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 346 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
345 | SECONDARY_EXEC_ENABLE_EPT; | 347 | SECONDARY_EXEC_ENABLE_EPT; |
346 | } | 348 | } |
347 | 349 | ||
348 | static inline int cpu_has_vmx_unrestricted_guest(void) | 350 | static inline bool cpu_has_vmx_unrestricted_guest(void) |
349 | { | 351 | { |
350 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 352 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
351 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 353 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
352 | } | 354 | } |
353 | 355 | ||
354 | static inline int cpu_has_vmx_ple(void) | 356 | static inline bool cpu_has_vmx_ple(void) |
355 | { | 357 | { |
356 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 358 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
357 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 359 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
358 | } | 360 | } |
359 | 361 | ||
360 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 362 | static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) |
361 | { | 363 | { |
362 | return flexpriority_enabled && irqchip_in_kernel(kvm); | 364 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
363 | } | 365 | } |
364 | 366 | ||
365 | static inline int cpu_has_vmx_vpid(void) | 367 | static inline bool cpu_has_vmx_vpid(void) |
366 | { | 368 | { |
367 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 369 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
368 | SECONDARY_EXEC_ENABLE_VPID; | 370 | SECONDARY_EXEC_ENABLE_VPID; |
369 | } | 371 | } |
370 | 372 | ||
371 | static inline int cpu_has_vmx_rdtscp(void) | 373 | static inline bool cpu_has_vmx_rdtscp(void) |
372 | { | 374 | { |
373 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 375 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
374 | SECONDARY_EXEC_RDTSCP; | 376 | SECONDARY_EXEC_RDTSCP; |
375 | } | 377 | } |
376 | 378 | ||
377 | static inline int cpu_has_virtual_nmis(void) | 379 | static inline bool cpu_has_virtual_nmis(void) |
378 | { | 380 | { |
379 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
380 | } | 382 | } |
@@ -598,11 +600,11 @@ static void reload_tss(void) | |||
598 | /* | 600 | /* |
599 | * VT restores TR but not its size. Useless. | 601 | * VT restores TR but not its size. Useless. |
600 | */ | 602 | */ |
601 | struct descriptor_table gdt; | 603 | struct desc_ptr gdt; |
602 | struct desc_struct *descs; | 604 | struct desc_struct *descs; |
603 | 605 | ||
604 | kvm_get_gdt(&gdt); | 606 | native_store_gdt(&gdt); |
605 | descs = (void *)gdt.base; | 607 | descs = (void *)gdt.address; |
606 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | 608 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ |
607 | load_TR_desc(); | 609 | load_TR_desc(); |
608 | } | 610 | } |
@@ -632,6 +634,43 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
632 | return true; | 634 | return true; |
633 | } | 635 | } |
634 | 636 | ||
637 | static unsigned long segment_base(u16 selector) | ||
638 | { | ||
639 | struct desc_ptr gdt; | ||
640 | struct desc_struct *d; | ||
641 | unsigned long table_base; | ||
642 | unsigned long v; | ||
643 | |||
644 | if (!(selector & ~3)) | ||
645 | return 0; | ||
646 | |||
647 | native_store_gdt(&gdt); | ||
648 | table_base = gdt.address; | ||
649 | |||
650 | if (selector & 4) { /* from ldt */ | ||
651 | u16 ldt_selector = kvm_read_ldt(); | ||
652 | |||
653 | if (!(ldt_selector & ~3)) | ||
654 | return 0; | ||
655 | |||
656 | table_base = segment_base(ldt_selector); | ||
657 | } | ||
658 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
659 | v = get_desc_base(d); | ||
660 | #ifdef CONFIG_X86_64 | ||
661 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
662 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
663 | #endif | ||
664 | return v; | ||
665 | } | ||
666 | |||
667 | static inline unsigned long kvm_read_tr_base(void) | ||
668 | { | ||
669 | u16 tr; | ||
670 | asm("str %0" : "=g"(tr)); | ||
671 | return segment_base(tr); | ||
672 | } | ||
673 | |||
635 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 674 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
636 | { | 675 | { |
637 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 676 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -756,7 +795,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
756 | } | 795 | } |
757 | 796 | ||
758 | if (vcpu->cpu != cpu) { | 797 | if (vcpu->cpu != cpu) { |
759 | struct descriptor_table dt; | 798 | struct desc_ptr dt; |
760 | unsigned long sysenter_esp; | 799 | unsigned long sysenter_esp; |
761 | 800 | ||
762 | vcpu->cpu = cpu; | 801 | vcpu->cpu = cpu; |
@@ -765,8 +804,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
765 | * processors. | 804 | * processors. |
766 | */ | 805 | */ |
767 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ | 806 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ |
768 | kvm_get_gdt(&dt); | 807 | native_store_gdt(&dt); |
769 | vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ | 808 | vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ |
770 | 809 | ||
771 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 810 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
772 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 811 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
@@ -818,18 +857,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
818 | 857 | ||
819 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 858 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
820 | { | 859 | { |
821 | unsigned long rflags; | 860 | unsigned long rflags, save_rflags; |
822 | 861 | ||
823 | rflags = vmcs_readl(GUEST_RFLAGS); | 862 | rflags = vmcs_readl(GUEST_RFLAGS); |
824 | if (to_vmx(vcpu)->rmode.vm86_active) | 863 | if (to_vmx(vcpu)->rmode.vm86_active) { |
825 | rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 864 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
865 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | ||
866 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | ||
867 | } | ||
826 | return rflags; | 868 | return rflags; |
827 | } | 869 | } |
828 | 870 | ||
829 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 871 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
830 | { | 872 | { |
831 | if (to_vmx(vcpu)->rmode.vm86_active) | 873 | if (to_vmx(vcpu)->rmode.vm86_active) { |
874 | to_vmx(vcpu)->rmode.save_rflags = rflags; | ||
832 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 875 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
876 | } | ||
833 | vmcs_writel(GUEST_RFLAGS, rflags); | 877 | vmcs_writel(GUEST_RFLAGS, rflags); |
834 | } | 878 | } |
835 | 879 | ||
@@ -839,9 +883,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
839 | int ret = 0; | 883 | int ret = 0; |
840 | 884 | ||
841 | if (interruptibility & GUEST_INTR_STATE_STI) | 885 | if (interruptibility & GUEST_INTR_STATE_STI) |
842 | ret |= X86_SHADOW_INT_STI; | 886 | ret |= KVM_X86_SHADOW_INT_STI; |
843 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | 887 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) |
844 | ret |= X86_SHADOW_INT_MOV_SS; | 888 | ret |= KVM_X86_SHADOW_INT_MOV_SS; |
845 | 889 | ||
846 | return ret & mask; | 890 | return ret & mask; |
847 | } | 891 | } |
@@ -853,9 +897,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
853 | 897 | ||
854 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); | 898 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); |
855 | 899 | ||
856 | if (mask & X86_SHADOW_INT_MOV_SS) | 900 | if (mask & KVM_X86_SHADOW_INT_MOV_SS) |
857 | interruptibility |= GUEST_INTR_STATE_MOV_SS; | 901 | interruptibility |= GUEST_INTR_STATE_MOV_SS; |
858 | if (mask & X86_SHADOW_INT_STI) | 902 | else if (mask & KVM_X86_SHADOW_INT_STI) |
859 | interruptibility |= GUEST_INTR_STATE_STI; | 903 | interruptibility |= GUEST_INTR_STATE_STI; |
860 | 904 | ||
861 | if ((interruptibility != interruptibility_old)) | 905 | if ((interruptibility != interruptibility_old)) |
@@ -1483,8 +1527,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1483 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | 1527 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
1484 | 1528 | ||
1485 | flags = vmcs_readl(GUEST_RFLAGS); | 1529 | flags = vmcs_readl(GUEST_RFLAGS); |
1486 | flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 1530 | flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
1487 | flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); | 1531 | flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; |
1488 | vmcs_writel(GUEST_RFLAGS, flags); | 1532 | vmcs_writel(GUEST_RFLAGS, flags); |
1489 | 1533 | ||
1490 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | | 1534 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | |
@@ -1514,7 +1558,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
1514 | struct kvm_memslots *slots; | 1558 | struct kvm_memslots *slots; |
1515 | gfn_t base_gfn; | 1559 | gfn_t base_gfn; |
1516 | 1560 | ||
1517 | slots = rcu_dereference(kvm->memslots); | 1561 | slots = kvm_memslots(kvm); |
1518 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1562 | base_gfn = kvm->memslots->memslots[0].base_gfn + |
1519 | kvm->memslots->memslots[0].npages - 3; | 1563 | kvm->memslots->memslots[0].npages - 3; |
1520 | return base_gfn << PAGE_SHIFT; | 1564 | return base_gfn << PAGE_SHIFT; |
@@ -1557,8 +1601,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1557 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 1601 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
1558 | 1602 | ||
1559 | flags = vmcs_readl(GUEST_RFLAGS); | 1603 | flags = vmcs_readl(GUEST_RFLAGS); |
1560 | vmx->rmode.save_iopl | 1604 | vmx->rmode.save_rflags = flags; |
1561 | = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1562 | 1605 | ||
1563 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 1606 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
1564 | 1607 | ||
@@ -1928,28 +1971,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | |||
1928 | *l = (ar >> 13) & 1; | 1971 | *l = (ar >> 13) & 1; |
1929 | } | 1972 | } |
1930 | 1973 | ||
1931 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1974 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1932 | { | 1975 | { |
1933 | dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); | 1976 | dt->size = vmcs_read32(GUEST_IDTR_LIMIT); |
1934 | dt->base = vmcs_readl(GUEST_IDTR_BASE); | 1977 | dt->address = vmcs_readl(GUEST_IDTR_BASE); |
1935 | } | 1978 | } |
1936 | 1979 | ||
1937 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1980 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1938 | { | 1981 | { |
1939 | vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); | 1982 | vmcs_write32(GUEST_IDTR_LIMIT, dt->size); |
1940 | vmcs_writel(GUEST_IDTR_BASE, dt->base); | 1983 | vmcs_writel(GUEST_IDTR_BASE, dt->address); |
1941 | } | 1984 | } |
1942 | 1985 | ||
1943 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1986 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1944 | { | 1987 | { |
1945 | dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); | 1988 | dt->size = vmcs_read32(GUEST_GDTR_LIMIT); |
1946 | dt->base = vmcs_readl(GUEST_GDTR_BASE); | 1989 | dt->address = vmcs_readl(GUEST_GDTR_BASE); |
1947 | } | 1990 | } |
1948 | 1991 | ||
1949 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1992 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1950 | { | 1993 | { |
1951 | vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); | 1994 | vmcs_write32(GUEST_GDTR_LIMIT, dt->size); |
1952 | vmcs_writel(GUEST_GDTR_BASE, dt->base); | 1995 | vmcs_writel(GUEST_GDTR_BASE, dt->address); |
1953 | } | 1996 | } |
1954 | 1997 | ||
1955 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | 1998 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) |
@@ -2290,6 +2333,16 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
2290 | spin_unlock(&vmx_vpid_lock); | 2333 | spin_unlock(&vmx_vpid_lock); |
2291 | } | 2334 | } |
2292 | 2335 | ||
2336 | static void free_vpid(struct vcpu_vmx *vmx) | ||
2337 | { | ||
2338 | if (!enable_vpid) | ||
2339 | return; | ||
2340 | spin_lock(&vmx_vpid_lock); | ||
2341 | if (vmx->vpid != 0) | ||
2342 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
2343 | spin_unlock(&vmx_vpid_lock); | ||
2344 | } | ||
2345 | |||
2293 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 2346 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) |
2294 | { | 2347 | { |
2295 | int f = sizeof(unsigned long); | 2348 | int f = sizeof(unsigned long); |
@@ -2328,7 +2381,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2328 | u32 junk; | 2381 | u32 junk; |
2329 | u64 host_pat, tsc_this, tsc_base; | 2382 | u64 host_pat, tsc_this, tsc_base; |
2330 | unsigned long a; | 2383 | unsigned long a; |
2331 | struct descriptor_table dt; | 2384 | struct desc_ptr dt; |
2332 | int i; | 2385 | int i; |
2333 | unsigned long kvm_vmx_return; | 2386 | unsigned long kvm_vmx_return; |
2334 | u32 exec_control; | 2387 | u32 exec_control; |
@@ -2409,8 +2462,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2409 | 2462 | ||
2410 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 2463 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
2411 | 2464 | ||
2412 | kvm_get_idt(&dt); | 2465 | native_store_idt(&dt); |
2413 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ | 2466 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
2414 | 2467 | ||
2415 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | 2468 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); |
2416 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ | 2469 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ |
@@ -2942,22 +2995,20 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
2942 | int size, in, string; | 2995 | int size, in, string; |
2943 | unsigned port; | 2996 | unsigned port; |
2944 | 2997 | ||
2945 | ++vcpu->stat.io_exits; | ||
2946 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 2998 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
2947 | string = (exit_qualification & 16) != 0; | 2999 | string = (exit_qualification & 16) != 0; |
3000 | in = (exit_qualification & 8) != 0; | ||
2948 | 3001 | ||
2949 | if (string) { | 3002 | ++vcpu->stat.io_exits; |
2950 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) | ||
2951 | return 0; | ||
2952 | return 1; | ||
2953 | } | ||
2954 | 3003 | ||
2955 | size = (exit_qualification & 7) + 1; | 3004 | if (string || in) |
2956 | in = (exit_qualification & 8) != 0; | 3005 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); |
2957 | port = exit_qualification >> 16; | ||
2958 | 3006 | ||
3007 | port = exit_qualification >> 16; | ||
3008 | size = (exit_qualification & 7) + 1; | ||
2959 | skip_emulated_instruction(vcpu); | 3009 | skip_emulated_instruction(vcpu); |
2960 | return kvm_emulate_pio(vcpu, in, size, port); | 3010 | |
3011 | return kvm_fast_pio_out(vcpu, size, port); | ||
2961 | } | 3012 | } |
2962 | 3013 | ||
2963 | static void | 3014 | static void |
@@ -3048,19 +3099,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3048 | return 0; | 3099 | return 0; |
3049 | } | 3100 | } |
3050 | 3101 | ||
3051 | static int check_dr_alias(struct kvm_vcpu *vcpu) | ||
3052 | { | ||
3053 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
3054 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3055 | return -1; | ||
3056 | } | ||
3057 | return 0; | ||
3058 | } | ||
3059 | |||
3060 | static int handle_dr(struct kvm_vcpu *vcpu) | 3102 | static int handle_dr(struct kvm_vcpu *vcpu) |
3061 | { | 3103 | { |
3062 | unsigned long exit_qualification; | 3104 | unsigned long exit_qualification; |
3063 | unsigned long val; | ||
3064 | int dr, reg; | 3105 | int dr, reg; |
3065 | 3106 | ||
3066 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | 3107 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ |
@@ -3095,67 +3136,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
3095 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | 3136 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; |
3096 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 3137 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
3097 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 3138 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
3098 | switch (dr) { | 3139 | unsigned long val; |
3099 | case 0 ... 3: | 3140 | if (!kvm_get_dr(vcpu, dr, &val)) |
3100 | val = vcpu->arch.db[dr]; | 3141 | kvm_register_write(vcpu, reg, val); |
3101 | break; | 3142 | } else |
3102 | case 4: | 3143 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); |
3103 | if (check_dr_alias(vcpu) < 0) | ||
3104 | return 1; | ||
3105 | /* fall through */ | ||
3106 | case 6: | ||
3107 | val = vcpu->arch.dr6; | ||
3108 | break; | ||
3109 | case 5: | ||
3110 | if (check_dr_alias(vcpu) < 0) | ||
3111 | return 1; | ||
3112 | /* fall through */ | ||
3113 | default: /* 7 */ | ||
3114 | val = vcpu->arch.dr7; | ||
3115 | break; | ||
3116 | } | ||
3117 | kvm_register_write(vcpu, reg, val); | ||
3118 | } else { | ||
3119 | val = vcpu->arch.regs[reg]; | ||
3120 | switch (dr) { | ||
3121 | case 0 ... 3: | ||
3122 | vcpu->arch.db[dr] = val; | ||
3123 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
3124 | vcpu->arch.eff_db[dr] = val; | ||
3125 | break; | ||
3126 | case 4: | ||
3127 | if (check_dr_alias(vcpu) < 0) | ||
3128 | return 1; | ||
3129 | /* fall through */ | ||
3130 | case 6: | ||
3131 | if (val & 0xffffffff00000000ULL) { | ||
3132 | kvm_inject_gp(vcpu, 0); | ||
3133 | return 1; | ||
3134 | } | ||
3135 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
3136 | break; | ||
3137 | case 5: | ||
3138 | if (check_dr_alias(vcpu) < 0) | ||
3139 | return 1; | ||
3140 | /* fall through */ | ||
3141 | default: /* 7 */ | ||
3142 | if (val & 0xffffffff00000000ULL) { | ||
3143 | kvm_inject_gp(vcpu, 0); | ||
3144 | return 1; | ||
3145 | } | ||
3146 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
3147 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
3148 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | ||
3149 | vcpu->arch.switch_db_regs = | ||
3150 | (val & DR7_BP_EN_MASK); | ||
3151 | } | ||
3152 | break; | ||
3153 | } | ||
3154 | } | ||
3155 | skip_emulated_instruction(vcpu); | 3144 | skip_emulated_instruction(vcpu); |
3156 | return 1; | 3145 | return 1; |
3157 | } | 3146 | } |
3158 | 3147 | ||
3148 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | ||
3149 | { | ||
3150 | vmcs_writel(GUEST_DR7, val); | ||
3151 | } | ||
3152 | |||
3159 | static int handle_cpuid(struct kvm_vcpu *vcpu) | 3153 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
3160 | { | 3154 | { |
3161 | kvm_emulate_cpuid(vcpu); | 3155 | kvm_emulate_cpuid(vcpu); |
@@ -3287,6 +3281,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3287 | { | 3281 | { |
3288 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3282 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3289 | unsigned long exit_qualification; | 3283 | unsigned long exit_qualification; |
3284 | bool has_error_code = false; | ||
3285 | u32 error_code = 0; | ||
3290 | u16 tss_selector; | 3286 | u16 tss_selector; |
3291 | int reason, type, idt_v; | 3287 | int reason, type, idt_v; |
3292 | 3288 | ||
@@ -3309,6 +3305,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3309 | kvm_clear_interrupt_queue(vcpu); | 3305 | kvm_clear_interrupt_queue(vcpu); |
3310 | break; | 3306 | break; |
3311 | case INTR_TYPE_HARD_EXCEPTION: | 3307 | case INTR_TYPE_HARD_EXCEPTION: |
3308 | if (vmx->idt_vectoring_info & | ||
3309 | VECTORING_INFO_DELIVER_CODE_MASK) { | ||
3310 | has_error_code = true; | ||
3311 | error_code = | ||
3312 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
3313 | } | ||
3314 | /* fall through */ | ||
3312 | case INTR_TYPE_SOFT_EXCEPTION: | 3315 | case INTR_TYPE_SOFT_EXCEPTION: |
3313 | kvm_clear_exception_queue(vcpu); | 3316 | kvm_clear_exception_queue(vcpu); |
3314 | break; | 3317 | break; |
@@ -3323,8 +3326,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3323 | type != INTR_TYPE_NMI_INTR)) | 3326 | type != INTR_TYPE_NMI_INTR)) |
3324 | skip_emulated_instruction(vcpu); | 3327 | skip_emulated_instruction(vcpu); |
3325 | 3328 | ||
3326 | if (!kvm_task_switch(vcpu, tss_selector, reason)) | 3329 | if (kvm_task_switch(vcpu, tss_selector, reason, |
3330 | has_error_code, error_code) == EMULATE_FAIL) { | ||
3331 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
3332 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3333 | vcpu->run->internal.ndata = 0; | ||
3327 | return 0; | 3334 | return 0; |
3335 | } | ||
3328 | 3336 | ||
3329 | /* clear all local breakpoint enable flags */ | 3337 | /* clear all local breakpoint enable flags */ |
3330 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); | 3338 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); |
@@ -3569,7 +3577,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3569 | u32 exit_reason = vmx->exit_reason; | 3577 | u32 exit_reason = vmx->exit_reason; |
3570 | u32 vectoring_info = vmx->idt_vectoring_info; | 3578 | u32 vectoring_info = vmx->idt_vectoring_info; |
3571 | 3579 | ||
3572 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3580 | trace_kvm_exit(exit_reason, vcpu); |
3573 | 3581 | ||
3574 | /* If guest state is invalid, start emulating */ | 3582 | /* If guest state is invalid, start emulating */ |
3575 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3583 | if (vmx->emulation_required && emulate_invalid_guest_state) |
@@ -3918,10 +3926,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
3918 | { | 3926 | { |
3919 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3927 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3920 | 3928 | ||
3921 | spin_lock(&vmx_vpid_lock); | 3929 | free_vpid(vmx); |
3922 | if (vmx->vpid != 0) | ||
3923 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
3924 | spin_unlock(&vmx_vpid_lock); | ||
3925 | vmx_free_vmcs(vcpu); | 3930 | vmx_free_vmcs(vcpu); |
3926 | kfree(vmx->guest_msrs); | 3931 | kfree(vmx->guest_msrs); |
3927 | kvm_vcpu_uninit(vcpu); | 3932 | kvm_vcpu_uninit(vcpu); |
@@ -3983,6 +3988,7 @@ free_msrs: | |||
3983 | uninit_vcpu: | 3988 | uninit_vcpu: |
3984 | kvm_vcpu_uninit(&vmx->vcpu); | 3989 | kvm_vcpu_uninit(&vmx->vcpu); |
3985 | free_vcpu: | 3990 | free_vcpu: |
3991 | free_vpid(vmx); | ||
3986 | kmem_cache_free(kvm_vcpu_cache, vmx); | 3992 | kmem_cache_free(kvm_vcpu_cache, vmx); |
3987 | return ERR_PTR(err); | 3993 | return ERR_PTR(err); |
3988 | } | 3994 | } |
@@ -4149,6 +4155,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4149 | .set_idt = vmx_set_idt, | 4155 | .set_idt = vmx_set_idt, |
4150 | .get_gdt = vmx_get_gdt, | 4156 | .get_gdt = vmx_get_gdt, |
4151 | .set_gdt = vmx_set_gdt, | 4157 | .set_gdt = vmx_set_gdt, |
4158 | .set_dr7 = vmx_set_dr7, | ||
4152 | .cache_reg = vmx_cache_reg, | 4159 | .cache_reg = vmx_cache_reg, |
4153 | .get_rflags = vmx_get_rflags, | 4160 | .get_rflags = vmx_get_rflags, |
4154 | .set_rflags = vmx_set_rflags, | 4161 | .set_rflags = vmx_set_rflags, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21b9b6aa3e88..848c814e8c3c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -42,7 +42,7 @@ | |||
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/perf_event.h> | 43 | #include <linux/perf_event.h> |
44 | #include <trace/events/kvm.h> | 44 | #include <trace/events/kvm.h> |
45 | #undef TRACE_INCLUDE_FILE | 45 | |
46 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
47 | #include "trace.h" | 47 | #include "trace.h" |
48 | 48 | ||
@@ -224,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore) | |||
224 | kvm_on_user_return(&smsr->urn); | 224 | kvm_on_user_return(&smsr->urn); |
225 | } | 225 | } |
226 | 226 | ||
227 | unsigned long segment_base(u16 selector) | ||
228 | { | ||
229 | struct descriptor_table gdt; | ||
230 | struct desc_struct *d; | ||
231 | unsigned long table_base; | ||
232 | unsigned long v; | ||
233 | |||
234 | if (selector == 0) | ||
235 | return 0; | ||
236 | |||
237 | kvm_get_gdt(&gdt); | ||
238 | table_base = gdt.base; | ||
239 | |||
240 | if (selector & 4) { /* from ldt */ | ||
241 | u16 ldt_selector = kvm_read_ldt(); | ||
242 | |||
243 | table_base = segment_base(ldt_selector); | ||
244 | } | ||
245 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
246 | v = get_desc_base(d); | ||
247 | #ifdef CONFIG_X86_64 | ||
248 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
249 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
250 | #endif | ||
251 | return v; | ||
252 | } | ||
253 | EXPORT_SYMBOL_GPL(segment_base); | ||
254 | |||
255 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | 227 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) |
256 | { | 228 | { |
257 | if (irqchip_in_kernel(vcpu->kvm)) | 229 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -434,8 +406,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
434 | 406 | ||
435 | #ifdef CONFIG_X86_64 | 407 | #ifdef CONFIG_X86_64 |
436 | if (cr0 & 0xffffffff00000000UL) { | 408 | if (cr0 & 0xffffffff00000000UL) { |
437 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | ||
438 | cr0, kvm_read_cr0(vcpu)); | ||
439 | kvm_inject_gp(vcpu, 0); | 409 | kvm_inject_gp(vcpu, 0); |
440 | return; | 410 | return; |
441 | } | 411 | } |
@@ -444,14 +414,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
444 | cr0 &= ~CR0_RESERVED_BITS; | 414 | cr0 &= ~CR0_RESERVED_BITS; |
445 | 415 | ||
446 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | 416 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { |
447 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); | ||
448 | kvm_inject_gp(vcpu, 0); | 417 | kvm_inject_gp(vcpu, 0); |
449 | return; | 418 | return; |
450 | } | 419 | } |
451 | 420 | ||
452 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { | 421 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { |
453 | printk(KERN_DEBUG "set_cr0: #GP, set PG flag " | ||
454 | "and a clear PE flag\n"); | ||
455 | kvm_inject_gp(vcpu, 0); | 422 | kvm_inject_gp(vcpu, 0); |
456 | return; | 423 | return; |
457 | } | 424 | } |
@@ -462,15 +429,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
462 | int cs_db, cs_l; | 429 | int cs_db, cs_l; |
463 | 430 | ||
464 | if (!is_pae(vcpu)) { | 431 | if (!is_pae(vcpu)) { |
465 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | ||
466 | "in long mode while PAE is disabled\n"); | ||
467 | kvm_inject_gp(vcpu, 0); | 432 | kvm_inject_gp(vcpu, 0); |
468 | return; | 433 | return; |
469 | } | 434 | } |
470 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 435 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
471 | if (cs_l) { | 436 | if (cs_l) { |
472 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | ||
473 | "in long mode while CS.L == 1\n"); | ||
474 | kvm_inject_gp(vcpu, 0); | 437 | kvm_inject_gp(vcpu, 0); |
475 | return; | 438 | return; |
476 | 439 | ||
@@ -478,8 +441,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
478 | } else | 441 | } else |
479 | #endif | 442 | #endif |
480 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 443 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { |
481 | printk(KERN_DEBUG "set_cr0: #GP, pdptrs " | ||
482 | "reserved bits\n"); | ||
483 | kvm_inject_gp(vcpu, 0); | 444 | kvm_inject_gp(vcpu, 0); |
484 | return; | 445 | return; |
485 | } | 446 | } |
@@ -487,7 +448,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
487 | } | 448 | } |
488 | 449 | ||
489 | kvm_x86_ops->set_cr0(vcpu, cr0); | 450 | kvm_x86_ops->set_cr0(vcpu, cr0); |
490 | vcpu->arch.cr0 = cr0; | ||
491 | 451 | ||
492 | kvm_mmu_reset_context(vcpu); | 452 | kvm_mmu_reset_context(vcpu); |
493 | return; | 453 | return; |
@@ -506,34 +466,28 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
506 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | 466 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; |
507 | 467 | ||
508 | if (cr4 & CR4_RESERVED_BITS) { | 468 | if (cr4 & CR4_RESERVED_BITS) { |
509 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); | ||
510 | kvm_inject_gp(vcpu, 0); | 469 | kvm_inject_gp(vcpu, 0); |
511 | return; | 470 | return; |
512 | } | 471 | } |
513 | 472 | ||
514 | if (is_long_mode(vcpu)) { | 473 | if (is_long_mode(vcpu)) { |
515 | if (!(cr4 & X86_CR4_PAE)) { | 474 | if (!(cr4 & X86_CR4_PAE)) { |
516 | printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " | ||
517 | "in long mode\n"); | ||
518 | kvm_inject_gp(vcpu, 0); | 475 | kvm_inject_gp(vcpu, 0); |
519 | return; | 476 | return; |
520 | } | 477 | } |
521 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 478 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
522 | && ((cr4 ^ old_cr4) & pdptr_bits) | 479 | && ((cr4 ^ old_cr4) & pdptr_bits) |
523 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 480 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { |
524 | printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); | ||
525 | kvm_inject_gp(vcpu, 0); | 481 | kvm_inject_gp(vcpu, 0); |
526 | return; | 482 | return; |
527 | } | 483 | } |
528 | 484 | ||
529 | if (cr4 & X86_CR4_VMXE) { | 485 | if (cr4 & X86_CR4_VMXE) { |
530 | printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); | ||
531 | kvm_inject_gp(vcpu, 0); | 486 | kvm_inject_gp(vcpu, 0); |
532 | return; | 487 | return; |
533 | } | 488 | } |
534 | kvm_x86_ops->set_cr4(vcpu, cr4); | 489 | kvm_x86_ops->set_cr4(vcpu, cr4); |
535 | vcpu->arch.cr4 = cr4; | 490 | vcpu->arch.cr4 = cr4; |
536 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; | ||
537 | kvm_mmu_reset_context(vcpu); | 491 | kvm_mmu_reset_context(vcpu); |
538 | } | 492 | } |
539 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 493 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
@@ -548,21 +502,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
548 | 502 | ||
549 | if (is_long_mode(vcpu)) { | 503 | if (is_long_mode(vcpu)) { |
550 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { | 504 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { |
551 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); | ||
552 | kvm_inject_gp(vcpu, 0); | 505 | kvm_inject_gp(vcpu, 0); |
553 | return; | 506 | return; |
554 | } | 507 | } |
555 | } else { | 508 | } else { |
556 | if (is_pae(vcpu)) { | 509 | if (is_pae(vcpu)) { |
557 | if (cr3 & CR3_PAE_RESERVED_BITS) { | 510 | if (cr3 & CR3_PAE_RESERVED_BITS) { |
558 | printk(KERN_DEBUG | ||
559 | "set_cr3: #GP, reserved bits\n"); | ||
560 | kvm_inject_gp(vcpu, 0); | 511 | kvm_inject_gp(vcpu, 0); |
561 | return; | 512 | return; |
562 | } | 513 | } |
563 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { | 514 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { |
564 | printk(KERN_DEBUG "set_cr3: #GP, pdptrs " | ||
565 | "reserved bits\n"); | ||
566 | kvm_inject_gp(vcpu, 0); | 515 | kvm_inject_gp(vcpu, 0); |
567 | return; | 516 | return; |
568 | } | 517 | } |
@@ -594,7 +543,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3); | |||
594 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 543 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
595 | { | 544 | { |
596 | if (cr8 & CR8_RESERVED_BITS) { | 545 | if (cr8 & CR8_RESERVED_BITS) { |
597 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); | ||
598 | kvm_inject_gp(vcpu, 0); | 546 | kvm_inject_gp(vcpu, 0); |
599 | return; | 547 | return; |
600 | } | 548 | } |
@@ -614,6 +562,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
614 | } | 562 | } |
615 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 563 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
616 | 564 | ||
565 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
566 | { | ||
567 | switch (dr) { | ||
568 | case 0 ... 3: | ||
569 | vcpu->arch.db[dr] = val; | ||
570 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
571 | vcpu->arch.eff_db[dr] = val; | ||
572 | break; | ||
573 | case 4: | ||
574 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
575 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
576 | return 1; | ||
577 | } | ||
578 | /* fall through */ | ||
579 | case 6: | ||
580 | if (val & 0xffffffff00000000ULL) { | ||
581 | kvm_inject_gp(vcpu, 0); | ||
582 | return 1; | ||
583 | } | ||
584 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
585 | break; | ||
586 | case 5: | ||
587 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
588 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
589 | return 1; | ||
590 | } | ||
591 | /* fall through */ | ||
592 | default: /* 7 */ | ||
593 | if (val & 0xffffffff00000000ULL) { | ||
594 | kvm_inject_gp(vcpu, 0); | ||
595 | return 1; | ||
596 | } | ||
597 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
598 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
599 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | ||
600 | vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); | ||
601 | } | ||
602 | break; | ||
603 | } | ||
604 | |||
605 | return 0; | ||
606 | } | ||
607 | EXPORT_SYMBOL_GPL(kvm_set_dr); | ||
608 | |||
609 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | ||
610 | { | ||
611 | switch (dr) { | ||
612 | case 0 ... 3: | ||
613 | *val = vcpu->arch.db[dr]; | ||
614 | break; | ||
615 | case 4: | ||
616 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
617 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
618 | return 1; | ||
619 | } | ||
620 | /* fall through */ | ||
621 | case 6: | ||
622 | *val = vcpu->arch.dr6; | ||
623 | break; | ||
624 | case 5: | ||
625 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
626 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
627 | return 1; | ||
628 | } | ||
629 | /* fall through */ | ||
630 | default: /* 7 */ | ||
631 | *val = vcpu->arch.dr7; | ||
632 | break; | ||
633 | } | ||
634 | |||
635 | return 0; | ||
636 | } | ||
637 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
638 | |||
617 | static inline u32 bit(int bitno) | 639 | static inline u32 bit(int bitno) |
618 | { | 640 | { |
619 | return 1 << (bitno & 31); | 641 | return 1 << (bitno & 31); |
@@ -650,15 +672,12 @@ static u32 emulated_msrs[] = { | |||
650 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 672 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) |
651 | { | 673 | { |
652 | if (efer & efer_reserved_bits) { | 674 | if (efer & efer_reserved_bits) { |
653 | printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", | ||
654 | efer); | ||
655 | kvm_inject_gp(vcpu, 0); | 675 | kvm_inject_gp(vcpu, 0); |
656 | return; | 676 | return; |
657 | } | 677 | } |
658 | 678 | ||
659 | if (is_paging(vcpu) | 679 | if (is_paging(vcpu) |
660 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { | 680 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { |
661 | printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); | ||
662 | kvm_inject_gp(vcpu, 0); | 681 | kvm_inject_gp(vcpu, 0); |
663 | return; | 682 | return; |
664 | } | 683 | } |
@@ -668,7 +687,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
668 | 687 | ||
669 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 688 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
670 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { | 689 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { |
671 | printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); | ||
672 | kvm_inject_gp(vcpu, 0); | 690 | kvm_inject_gp(vcpu, 0); |
673 | return; | 691 | return; |
674 | } | 692 | } |
@@ -679,7 +697,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
679 | 697 | ||
680 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 698 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
681 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { | 699 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { |
682 | printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); | ||
683 | kvm_inject_gp(vcpu, 0); | 700 | kvm_inject_gp(vcpu, 0); |
684 | return; | 701 | return; |
685 | } | 702 | } |
@@ -968,9 +985,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
968 | if (msr >= MSR_IA32_MC0_CTL && | 985 | if (msr >= MSR_IA32_MC0_CTL && |
969 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { | 986 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { |
970 | u32 offset = msr - MSR_IA32_MC0_CTL; | 987 | u32 offset = msr - MSR_IA32_MC0_CTL; |
971 | /* only 0 or all 1s can be written to IA32_MCi_CTL */ | 988 | /* only 0 or all 1s can be written to IA32_MCi_CTL |
989 | * some Linux kernels though clear bit 10 in bank 4 to | ||
990 | * workaround a BIOS/GART TBL issue on AMD K8s, ignore | ||
991 | * this to avoid an uncatched #GP in the guest | ||
992 | */ | ||
972 | if ((offset & 0x3) == 0 && | 993 | if ((offset & 0x3) == 0 && |
973 | data != 0 && data != ~(u64)0) | 994 | data != 0 && (data | (1 << 10)) != ~(u64)0) |
974 | return -1; | 995 | return -1; |
975 | vcpu->arch.mce_banks[offset] = data; | 996 | vcpu->arch.mce_banks[offset] = data; |
976 | break; | 997 | break; |
@@ -1114,6 +1135,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1114 | break; | 1135 | break; |
1115 | case MSR_K7_HWCR: | 1136 | case MSR_K7_HWCR: |
1116 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 1137 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
1138 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | ||
1117 | if (data != 0) { | 1139 | if (data != 0) { |
1118 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1140 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
1119 | data); | 1141 | data); |
@@ -1572,6 +1594,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1572 | case KVM_CAP_HYPERV_VAPIC: | 1594 | case KVM_CAP_HYPERV_VAPIC: |
1573 | case KVM_CAP_HYPERV_SPIN: | 1595 | case KVM_CAP_HYPERV_SPIN: |
1574 | case KVM_CAP_PCI_SEGMENT: | 1596 | case KVM_CAP_PCI_SEGMENT: |
1597 | case KVM_CAP_DEBUGREGS: | ||
1575 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1598 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1576 | r = 1; | 1599 | r = 1; |
1577 | break; | 1600 | break; |
@@ -2124,14 +2147,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2124 | { | 2147 | { |
2125 | vcpu_load(vcpu); | 2148 | vcpu_load(vcpu); |
2126 | 2149 | ||
2127 | events->exception.injected = vcpu->arch.exception.pending; | 2150 | events->exception.injected = |
2151 | vcpu->arch.exception.pending && | ||
2152 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | ||
2128 | events->exception.nr = vcpu->arch.exception.nr; | 2153 | events->exception.nr = vcpu->arch.exception.nr; |
2129 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 2154 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
2130 | events->exception.error_code = vcpu->arch.exception.error_code; | 2155 | events->exception.error_code = vcpu->arch.exception.error_code; |
2131 | 2156 | ||
2132 | events->interrupt.injected = vcpu->arch.interrupt.pending; | 2157 | events->interrupt.injected = |
2158 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; | ||
2133 | events->interrupt.nr = vcpu->arch.interrupt.nr; | 2159 | events->interrupt.nr = vcpu->arch.interrupt.nr; |
2134 | events->interrupt.soft = vcpu->arch.interrupt.soft; | 2160 | events->interrupt.soft = 0; |
2161 | events->interrupt.shadow = | ||
2162 | kvm_x86_ops->get_interrupt_shadow(vcpu, | ||
2163 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | ||
2135 | 2164 | ||
2136 | events->nmi.injected = vcpu->arch.nmi_injected; | 2165 | events->nmi.injected = vcpu->arch.nmi_injected; |
2137 | events->nmi.pending = vcpu->arch.nmi_pending; | 2166 | events->nmi.pending = vcpu->arch.nmi_pending; |
@@ -2140,7 +2169,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2140 | events->sipi_vector = vcpu->arch.sipi_vector; | 2169 | events->sipi_vector = vcpu->arch.sipi_vector; |
2141 | 2170 | ||
2142 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2171 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2143 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | 2172 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2173 | | KVM_VCPUEVENT_VALID_SHADOW); | ||
2144 | 2174 | ||
2145 | vcpu_put(vcpu); | 2175 | vcpu_put(vcpu); |
2146 | } | 2176 | } |
@@ -2149,7 +2179,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2149 | struct kvm_vcpu_events *events) | 2179 | struct kvm_vcpu_events *events) |
2150 | { | 2180 | { |
2151 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | 2181 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING |
2152 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | 2182 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2183 | | KVM_VCPUEVENT_VALID_SHADOW)) | ||
2153 | return -EINVAL; | 2184 | return -EINVAL; |
2154 | 2185 | ||
2155 | vcpu_load(vcpu); | 2186 | vcpu_load(vcpu); |
@@ -2164,6 +2195,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2164 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2195 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
2165 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | 2196 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) |
2166 | kvm_pic_clear_isr_ack(vcpu->kvm); | 2197 | kvm_pic_clear_isr_ack(vcpu->kvm); |
2198 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | ||
2199 | kvm_x86_ops->set_interrupt_shadow(vcpu, | ||
2200 | events->interrupt.shadow); | ||
2167 | 2201 | ||
2168 | vcpu->arch.nmi_injected = events->nmi.injected; | 2202 | vcpu->arch.nmi_injected = events->nmi.injected; |
2169 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) | 2203 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) |
@@ -2178,6 +2212,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2178 | return 0; | 2212 | return 0; |
2179 | } | 2213 | } |
2180 | 2214 | ||
2215 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | ||
2216 | struct kvm_debugregs *dbgregs) | ||
2217 | { | ||
2218 | vcpu_load(vcpu); | ||
2219 | |||
2220 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | ||
2221 | dbgregs->dr6 = vcpu->arch.dr6; | ||
2222 | dbgregs->dr7 = vcpu->arch.dr7; | ||
2223 | dbgregs->flags = 0; | ||
2224 | |||
2225 | vcpu_put(vcpu); | ||
2226 | } | ||
2227 | |||
2228 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | ||
2229 | struct kvm_debugregs *dbgregs) | ||
2230 | { | ||
2231 | if (dbgregs->flags) | ||
2232 | return -EINVAL; | ||
2233 | |||
2234 | vcpu_load(vcpu); | ||
2235 | |||
2236 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | ||
2237 | vcpu->arch.dr6 = dbgregs->dr6; | ||
2238 | vcpu->arch.dr7 = dbgregs->dr7; | ||
2239 | |||
2240 | vcpu_put(vcpu); | ||
2241 | |||
2242 | return 0; | ||
2243 | } | ||
2244 | |||
2181 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2245 | long kvm_arch_vcpu_ioctl(struct file *filp, |
2182 | unsigned int ioctl, unsigned long arg) | 2246 | unsigned int ioctl, unsigned long arg) |
2183 | { | 2247 | { |
@@ -2356,6 +2420,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2356 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | 2420 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); |
2357 | break; | 2421 | break; |
2358 | } | 2422 | } |
2423 | case KVM_GET_DEBUGREGS: { | ||
2424 | struct kvm_debugregs dbgregs; | ||
2425 | |||
2426 | kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); | ||
2427 | |||
2428 | r = -EFAULT; | ||
2429 | if (copy_to_user(argp, &dbgregs, | ||
2430 | sizeof(struct kvm_debugregs))) | ||
2431 | break; | ||
2432 | r = 0; | ||
2433 | break; | ||
2434 | } | ||
2435 | case KVM_SET_DEBUGREGS: { | ||
2436 | struct kvm_debugregs dbgregs; | ||
2437 | |||
2438 | r = -EFAULT; | ||
2439 | if (copy_from_user(&dbgregs, argp, | ||
2440 | sizeof(struct kvm_debugregs))) | ||
2441 | break; | ||
2442 | |||
2443 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | ||
2444 | break; | ||
2445 | } | ||
2359 | default: | 2446 | default: |
2360 | r = -EINVAL; | 2447 | r = -EINVAL; |
2361 | } | 2448 | } |
@@ -2409,7 +2496,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | |||
2409 | struct kvm_mem_alias *alias; | 2496 | struct kvm_mem_alias *alias; |
2410 | struct kvm_mem_aliases *aliases; | 2497 | struct kvm_mem_aliases *aliases; |
2411 | 2498 | ||
2412 | aliases = rcu_dereference(kvm->arch.aliases); | 2499 | aliases = kvm_aliases(kvm); |
2413 | 2500 | ||
2414 | for (i = 0; i < aliases->naliases; ++i) { | 2501 | for (i = 0; i < aliases->naliases; ++i) { |
2415 | alias = &aliases->aliases[i]; | 2502 | alias = &aliases->aliases[i]; |
@@ -2428,7 +2515,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
2428 | struct kvm_mem_alias *alias; | 2515 | struct kvm_mem_alias *alias; |
2429 | struct kvm_mem_aliases *aliases; | 2516 | struct kvm_mem_aliases *aliases; |
2430 | 2517 | ||
2431 | aliases = rcu_dereference(kvm->arch.aliases); | 2518 | aliases = kvm_aliases(kvm); |
2432 | 2519 | ||
2433 | for (i = 0; i < aliases->naliases; ++i) { | 2520 | for (i = 0; i < aliases->naliases; ++i) { |
2434 | alias = &aliases->aliases[i]; | 2521 | alias = &aliases->aliases[i]; |
@@ -2636,8 +2723,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
2636 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 2723 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
2637 | struct kvm_dirty_log *log) | 2724 | struct kvm_dirty_log *log) |
2638 | { | 2725 | { |
2639 | int r, n, i; | 2726 | int r, i; |
2640 | struct kvm_memory_slot *memslot; | 2727 | struct kvm_memory_slot *memslot; |
2728 | unsigned long n; | ||
2641 | unsigned long is_dirty = 0; | 2729 | unsigned long is_dirty = 0; |
2642 | unsigned long *dirty_bitmap = NULL; | 2730 | unsigned long *dirty_bitmap = NULL; |
2643 | 2731 | ||
@@ -2652,7 +2740,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2652 | if (!memslot->dirty_bitmap) | 2740 | if (!memslot->dirty_bitmap) |
2653 | goto out; | 2741 | goto out; |
2654 | 2742 | ||
2655 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 2743 | n = kvm_dirty_bitmap_bytes(memslot); |
2656 | 2744 | ||
2657 | r = -ENOMEM; | 2745 | r = -ENOMEM; |
2658 | dirty_bitmap = vmalloc(n); | 2746 | dirty_bitmap = vmalloc(n); |
@@ -2822,11 +2910,13 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2822 | r = -EFAULT; | 2910 | r = -EFAULT; |
2823 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 2911 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
2824 | goto out; | 2912 | goto out; |
2913 | r = -ENXIO; | ||
2825 | if (irqchip_in_kernel(kvm)) { | 2914 | if (irqchip_in_kernel(kvm)) { |
2826 | __s32 status; | 2915 | __s32 status; |
2827 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2916 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
2828 | irq_event.irq, irq_event.level); | 2917 | irq_event.irq, irq_event.level); |
2829 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2918 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
2919 | r = -EFAULT; | ||
2830 | irq_event.status = status; | 2920 | irq_event.status = status; |
2831 | if (copy_to_user(argp, &irq_event, | 2921 | if (copy_to_user(argp, &irq_event, |
2832 | sizeof irq_event)) | 2922 | sizeof irq_event)) |
@@ -3042,6 +3132,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
3042 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3132 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
3043 | } | 3133 | } |
3044 | 3134 | ||
3135 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | ||
3136 | struct kvm_segment *var, int seg) | ||
3137 | { | ||
3138 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
3139 | } | ||
3140 | |||
3141 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
3142 | struct kvm_segment *var, int seg) | ||
3143 | { | ||
3144 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
3145 | } | ||
3146 | |||
3045 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3147 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
3046 | { | 3148 | { |
3047 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3149 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
@@ -3122,14 +3224,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | |||
3122 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3224 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); |
3123 | } | 3225 | } |
3124 | 3226 | ||
3125 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3227 | static int kvm_write_guest_virt_system(gva_t addr, void *val, |
3126 | struct kvm_vcpu *vcpu, u32 *error) | 3228 | unsigned int bytes, |
3229 | struct kvm_vcpu *vcpu, | ||
3230 | u32 *error) | ||
3127 | { | 3231 | { |
3128 | void *data = val; | 3232 | void *data = val; |
3129 | int r = X86EMUL_CONTINUE; | 3233 | int r = X86EMUL_CONTINUE; |
3130 | 3234 | ||
3131 | while (bytes) { | 3235 | while (bytes) { |
3132 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); | 3236 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, |
3237 | PFERR_WRITE_MASK, error); | ||
3133 | unsigned offset = addr & (PAGE_SIZE-1); | 3238 | unsigned offset = addr & (PAGE_SIZE-1); |
3134 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3239 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
3135 | int ret; | 3240 | int ret; |
@@ -3152,7 +3257,6 @@ out: | |||
3152 | return r; | 3257 | return r; |
3153 | } | 3258 | } |
3154 | 3259 | ||
3155 | |||
3156 | static int emulator_read_emulated(unsigned long addr, | 3260 | static int emulator_read_emulated(unsigned long addr, |
3157 | void *val, | 3261 | void *val, |
3158 | unsigned int bytes, | 3262 | unsigned int bytes, |
@@ -3255,9 +3359,9 @@ mmio: | |||
3255 | } | 3359 | } |
3256 | 3360 | ||
3257 | int emulator_write_emulated(unsigned long addr, | 3361 | int emulator_write_emulated(unsigned long addr, |
3258 | const void *val, | 3362 | const void *val, |
3259 | unsigned int bytes, | 3363 | unsigned int bytes, |
3260 | struct kvm_vcpu *vcpu) | 3364 | struct kvm_vcpu *vcpu) |
3261 | { | 3365 | { |
3262 | /* Crossing a page boundary? */ | 3366 | /* Crossing a page boundary? */ |
3263 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3367 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
@@ -3275,45 +3379,150 @@ int emulator_write_emulated(unsigned long addr, | |||
3275 | } | 3379 | } |
3276 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | 3380 | EXPORT_SYMBOL_GPL(emulator_write_emulated); |
3277 | 3381 | ||
3382 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | ||
3383 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | ||
3384 | |||
3385 | #ifdef CONFIG_X86_64 | ||
3386 | # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) | ||
3387 | #else | ||
3388 | # define CMPXCHG64(ptr, old, new) \ | ||
3389 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | ||
3390 | #endif | ||
3391 | |||
3278 | static int emulator_cmpxchg_emulated(unsigned long addr, | 3392 | static int emulator_cmpxchg_emulated(unsigned long addr, |
3279 | const void *old, | 3393 | const void *old, |
3280 | const void *new, | 3394 | const void *new, |
3281 | unsigned int bytes, | 3395 | unsigned int bytes, |
3282 | struct kvm_vcpu *vcpu) | 3396 | struct kvm_vcpu *vcpu) |
3283 | { | 3397 | { |
3284 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3398 | gpa_t gpa; |
3285 | #ifndef CONFIG_X86_64 | 3399 | struct page *page; |
3286 | /* guests cmpxchg8b have to be emulated atomically */ | 3400 | char *kaddr; |
3287 | if (bytes == 8) { | 3401 | bool exchanged; |
3288 | gpa_t gpa; | ||
3289 | struct page *page; | ||
3290 | char *kaddr; | ||
3291 | u64 val; | ||
3292 | 3402 | ||
3293 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); | 3403 | /* guests cmpxchg8b have to be emulated atomically */ |
3404 | if (bytes > 8 || (bytes & (bytes - 1))) | ||
3405 | goto emul_write; | ||
3294 | 3406 | ||
3295 | if (gpa == UNMAPPED_GVA || | 3407 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
3296 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
3297 | goto emul_write; | ||
3298 | 3408 | ||
3299 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) | 3409 | if (gpa == UNMAPPED_GVA || |
3300 | goto emul_write; | 3410 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3411 | goto emul_write; | ||
3301 | 3412 | ||
3302 | val = *(u64 *)new; | 3413 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) |
3414 | goto emul_write; | ||
3303 | 3415 | ||
3304 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3416 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3305 | 3417 | ||
3306 | kaddr = kmap_atomic(page, KM_USER0); | 3418 | kaddr = kmap_atomic(page, KM_USER0); |
3307 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 3419 | kaddr += offset_in_page(gpa); |
3308 | kunmap_atomic(kaddr, KM_USER0); | 3420 | switch (bytes) { |
3309 | kvm_release_page_dirty(page); | 3421 | case 1: |
3422 | exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); | ||
3423 | break; | ||
3424 | case 2: | ||
3425 | exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); | ||
3426 | break; | ||
3427 | case 4: | ||
3428 | exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); | ||
3429 | break; | ||
3430 | case 8: | ||
3431 | exchanged = CMPXCHG64(kaddr, old, new); | ||
3432 | break; | ||
3433 | default: | ||
3434 | BUG(); | ||
3310 | } | 3435 | } |
3436 | kunmap_atomic(kaddr, KM_USER0); | ||
3437 | kvm_release_page_dirty(page); | ||
3438 | |||
3439 | if (!exchanged) | ||
3440 | return X86EMUL_CMPXCHG_FAILED; | ||
3441 | |||
3442 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | ||
3443 | |||
3444 | return X86EMUL_CONTINUE; | ||
3445 | |||
3311 | emul_write: | 3446 | emul_write: |
3312 | #endif | 3447 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3313 | 3448 | ||
3314 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3449 | return emulator_write_emulated(addr, new, bytes, vcpu); |
3315 | } | 3450 | } |
3316 | 3451 | ||
3452 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
3453 | { | ||
3454 | /* TODO: String I/O for in kernel device */ | ||
3455 | int r; | ||
3456 | |||
3457 | if (vcpu->arch.pio.in) | ||
3458 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
3459 | vcpu->arch.pio.size, pd); | ||
3460 | else | ||
3461 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3462 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
3463 | pd); | ||
3464 | return r; | ||
3465 | } | ||
3466 | |||
3467 | |||
3468 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | ||
3469 | unsigned int count, struct kvm_vcpu *vcpu) | ||
3470 | { | ||
3471 | if (vcpu->arch.pio.count) | ||
3472 | goto data_avail; | ||
3473 | |||
3474 | trace_kvm_pio(1, port, size, 1); | ||
3475 | |||
3476 | vcpu->arch.pio.port = port; | ||
3477 | vcpu->arch.pio.in = 1; | ||
3478 | vcpu->arch.pio.count = count; | ||
3479 | vcpu->arch.pio.size = size; | ||
3480 | |||
3481 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3482 | data_avail: | ||
3483 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
3484 | vcpu->arch.pio.count = 0; | ||
3485 | return 1; | ||
3486 | } | ||
3487 | |||
3488 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3489 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | ||
3490 | vcpu->run->io.size = size; | ||
3491 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3492 | vcpu->run->io.count = count; | ||
3493 | vcpu->run->io.port = port; | ||
3494 | |||
3495 | return 0; | ||
3496 | } | ||
3497 | |||
3498 | static int emulator_pio_out_emulated(int size, unsigned short port, | ||
3499 | const void *val, unsigned int count, | ||
3500 | struct kvm_vcpu *vcpu) | ||
3501 | { | ||
3502 | trace_kvm_pio(0, port, size, 1); | ||
3503 | |||
3504 | vcpu->arch.pio.port = port; | ||
3505 | vcpu->arch.pio.in = 0; | ||
3506 | vcpu->arch.pio.count = count; | ||
3507 | vcpu->arch.pio.size = size; | ||
3508 | |||
3509 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
3510 | |||
3511 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3512 | vcpu->arch.pio.count = 0; | ||
3513 | return 1; | ||
3514 | } | ||
3515 | |||
3516 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3517 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
3518 | vcpu->run->io.size = size; | ||
3519 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3520 | vcpu->run->io.count = count; | ||
3521 | vcpu->run->io.port = port; | ||
3522 | |||
3523 | return 0; | ||
3524 | } | ||
3525 | |||
3317 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3526 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
3318 | { | 3527 | { |
3319 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3528 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
@@ -3334,14 +3543,14 @@ int emulate_clts(struct kvm_vcpu *vcpu) | |||
3334 | 3543 | ||
3335 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3544 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
3336 | { | 3545 | { |
3337 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); | 3546 | return kvm_get_dr(ctxt->vcpu, dr, dest); |
3338 | } | 3547 | } |
3339 | 3548 | ||
3340 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3549 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
3341 | { | 3550 | { |
3342 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3551 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
3343 | 3552 | ||
3344 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); | 3553 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); |
3345 | } | 3554 | } |
3346 | 3555 | ||
3347 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3556 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
@@ -3362,12 +3571,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3362 | } | 3571 | } |
3363 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3572 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
3364 | 3573 | ||
3574 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
3575 | { | ||
3576 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
3577 | } | ||
3578 | |||
3579 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | ||
3580 | { | ||
3581 | unsigned long value; | ||
3582 | |||
3583 | switch (cr) { | ||
3584 | case 0: | ||
3585 | value = kvm_read_cr0(vcpu); | ||
3586 | break; | ||
3587 | case 2: | ||
3588 | value = vcpu->arch.cr2; | ||
3589 | break; | ||
3590 | case 3: | ||
3591 | value = vcpu->arch.cr3; | ||
3592 | break; | ||
3593 | case 4: | ||
3594 | value = kvm_read_cr4(vcpu); | ||
3595 | break; | ||
3596 | case 8: | ||
3597 | value = kvm_get_cr8(vcpu); | ||
3598 | break; | ||
3599 | default: | ||
3600 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3601 | return 0; | ||
3602 | } | ||
3603 | |||
3604 | return value; | ||
3605 | } | ||
3606 | |||
3607 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | ||
3608 | { | ||
3609 | switch (cr) { | ||
3610 | case 0: | ||
3611 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
3612 | break; | ||
3613 | case 2: | ||
3614 | vcpu->arch.cr2 = val; | ||
3615 | break; | ||
3616 | case 3: | ||
3617 | kvm_set_cr3(vcpu, val); | ||
3618 | break; | ||
3619 | case 4: | ||
3620 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
3621 | break; | ||
3622 | case 8: | ||
3623 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
3624 | break; | ||
3625 | default: | ||
3626 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3627 | } | ||
3628 | } | ||
3629 | |||
3630 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | ||
3631 | { | ||
3632 | return kvm_x86_ops->get_cpl(vcpu); | ||
3633 | } | ||
3634 | |||
3635 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | ||
3636 | { | ||
3637 | kvm_x86_ops->get_gdt(vcpu, dt); | ||
3638 | } | ||
3639 | |||
3640 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | ||
3641 | struct kvm_vcpu *vcpu) | ||
3642 | { | ||
3643 | struct kvm_segment var; | ||
3644 | |||
3645 | kvm_get_segment(vcpu, &var, seg); | ||
3646 | |||
3647 | if (var.unusable) | ||
3648 | return false; | ||
3649 | |||
3650 | if (var.g) | ||
3651 | var.limit >>= 12; | ||
3652 | set_desc_limit(desc, var.limit); | ||
3653 | set_desc_base(desc, (unsigned long)var.base); | ||
3654 | desc->type = var.type; | ||
3655 | desc->s = var.s; | ||
3656 | desc->dpl = var.dpl; | ||
3657 | desc->p = var.present; | ||
3658 | desc->avl = var.avl; | ||
3659 | desc->l = var.l; | ||
3660 | desc->d = var.db; | ||
3661 | desc->g = var.g; | ||
3662 | |||
3663 | return true; | ||
3664 | } | ||
3665 | |||
3666 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | ||
3667 | struct kvm_vcpu *vcpu) | ||
3668 | { | ||
3669 | struct kvm_segment var; | ||
3670 | |||
3671 | /* needed to preserve selector */ | ||
3672 | kvm_get_segment(vcpu, &var, seg); | ||
3673 | |||
3674 | var.base = get_desc_base(desc); | ||
3675 | var.limit = get_desc_limit(desc); | ||
3676 | if (desc->g) | ||
3677 | var.limit = (var.limit << 12) | 0xfff; | ||
3678 | var.type = desc->type; | ||
3679 | var.present = desc->p; | ||
3680 | var.dpl = desc->dpl; | ||
3681 | var.db = desc->d; | ||
3682 | var.s = desc->s; | ||
3683 | var.l = desc->l; | ||
3684 | var.g = desc->g; | ||
3685 | var.avl = desc->avl; | ||
3686 | var.present = desc->p; | ||
3687 | var.unusable = !var.present; | ||
3688 | var.padding = 0; | ||
3689 | |||
3690 | kvm_set_segment(vcpu, &var, seg); | ||
3691 | return; | ||
3692 | } | ||
3693 | |||
3694 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | ||
3695 | { | ||
3696 | struct kvm_segment kvm_seg; | ||
3697 | |||
3698 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
3699 | return kvm_seg.selector; | ||
3700 | } | ||
3701 | |||
3702 | static void emulator_set_segment_selector(u16 sel, int seg, | ||
3703 | struct kvm_vcpu *vcpu) | ||
3704 | { | ||
3705 | struct kvm_segment kvm_seg; | ||
3706 | |||
3707 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
3708 | kvm_seg.selector = sel; | ||
3709 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
3710 | } | ||
3711 | |||
3712 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
3713 | { | ||
3714 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
3715 | } | ||
3716 | |||
3365 | static struct x86_emulate_ops emulate_ops = { | 3717 | static struct x86_emulate_ops emulate_ops = { |
3366 | .read_std = kvm_read_guest_virt_system, | 3718 | .read_std = kvm_read_guest_virt_system, |
3719 | .write_std = kvm_write_guest_virt_system, | ||
3367 | .fetch = kvm_fetch_guest_virt, | 3720 | .fetch = kvm_fetch_guest_virt, |
3368 | .read_emulated = emulator_read_emulated, | 3721 | .read_emulated = emulator_read_emulated, |
3369 | .write_emulated = emulator_write_emulated, | 3722 | .write_emulated = emulator_write_emulated, |
3370 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3723 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
3724 | .pio_in_emulated = emulator_pio_in_emulated, | ||
3725 | .pio_out_emulated = emulator_pio_out_emulated, | ||
3726 | .get_cached_descriptor = emulator_get_cached_descriptor, | ||
3727 | .set_cached_descriptor = emulator_set_cached_descriptor, | ||
3728 | .get_segment_selector = emulator_get_segment_selector, | ||
3729 | .set_segment_selector = emulator_set_segment_selector, | ||
3730 | .get_gdt = emulator_get_gdt, | ||
3731 | .get_cr = emulator_get_cr, | ||
3732 | .set_cr = emulator_set_cr, | ||
3733 | .cpl = emulator_get_cpl, | ||
3734 | .set_rflags = emulator_set_rflags, | ||
3371 | }; | 3735 | }; |
3372 | 3736 | ||
3373 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3737 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -3398,14 +3762,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3398 | cache_all_regs(vcpu); | 3762 | cache_all_regs(vcpu); |
3399 | 3763 | ||
3400 | vcpu->mmio_is_write = 0; | 3764 | vcpu->mmio_is_write = 0; |
3401 | vcpu->arch.pio.string = 0; | ||
3402 | 3765 | ||
3403 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3766 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
3404 | int cs_db, cs_l; | 3767 | int cs_db, cs_l; |
3405 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3768 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
3406 | 3769 | ||
3407 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3770 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
3408 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3771 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
3772 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
3409 | vcpu->arch.emulate_ctxt.mode = | 3773 | vcpu->arch.emulate_ctxt.mode = |
3410 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 3774 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
3411 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3775 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
@@ -3414,6 +3778,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3414 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3778 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3415 | 3779 | ||
3416 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3780 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3781 | trace_kvm_emulate_insn_start(vcpu); | ||
3417 | 3782 | ||
3418 | /* Only allow emulation of specific instructions on #UD | 3783 | /* Only allow emulation of specific instructions on #UD |
3419 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 3784 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
@@ -3446,6 +3811,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3446 | ++vcpu->stat.insn_emulation; | 3811 | ++vcpu->stat.insn_emulation; |
3447 | if (r) { | 3812 | if (r) { |
3448 | ++vcpu->stat.insn_emulation_fail; | 3813 | ++vcpu->stat.insn_emulation_fail; |
3814 | trace_kvm_emulate_insn_failed(vcpu); | ||
3449 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3815 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
3450 | return EMULATE_DONE; | 3816 | return EMULATE_DONE; |
3451 | return EMULATE_FAIL; | 3817 | return EMULATE_FAIL; |
@@ -3457,16 +3823,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3457 | return EMULATE_DONE; | 3823 | return EMULATE_DONE; |
3458 | } | 3824 | } |
3459 | 3825 | ||
3826 | restart: | ||
3460 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3827 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3461 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | 3828 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; |
3462 | 3829 | ||
3463 | if (r == 0) | 3830 | if (r == 0) |
3464 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 3831 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); |
3465 | 3832 | ||
3466 | if (vcpu->arch.pio.string) | 3833 | if (vcpu->arch.pio.count) { |
3834 | if (!vcpu->arch.pio.in) | ||
3835 | vcpu->arch.pio.count = 0; | ||
3467 | return EMULATE_DO_MMIO; | 3836 | return EMULATE_DO_MMIO; |
3837 | } | ||
3468 | 3838 | ||
3469 | if ((r || vcpu->mmio_is_write) && run) { | 3839 | if (r || vcpu->mmio_is_write) { |
3470 | run->exit_reason = KVM_EXIT_MMIO; | 3840 | run->exit_reason = KVM_EXIT_MMIO; |
3471 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 3841 | run->mmio.phys_addr = vcpu->mmio_phys_addr; |
3472 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 3842 | memcpy(run->mmio.data, vcpu->mmio_data, 8); |
@@ -3476,222 +3846,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3476 | 3846 | ||
3477 | if (r) { | 3847 | if (r) { |
3478 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3848 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
3479 | return EMULATE_DONE; | 3849 | goto done; |
3480 | if (!vcpu->mmio_needed) { | 3850 | if (!vcpu->mmio_needed) { |
3851 | ++vcpu->stat.insn_emulation_fail; | ||
3852 | trace_kvm_emulate_insn_failed(vcpu); | ||
3481 | kvm_report_emulation_failure(vcpu, "mmio"); | 3853 | kvm_report_emulation_failure(vcpu, "mmio"); |
3482 | return EMULATE_FAIL; | 3854 | return EMULATE_FAIL; |
3483 | } | 3855 | } |
3484 | return EMULATE_DO_MMIO; | 3856 | return EMULATE_DO_MMIO; |
3485 | } | 3857 | } |
3486 | 3858 | ||
3487 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
3488 | |||
3489 | if (vcpu->mmio_is_write) { | 3859 | if (vcpu->mmio_is_write) { |
3490 | vcpu->mmio_needed = 0; | 3860 | vcpu->mmio_needed = 0; |
3491 | return EMULATE_DO_MMIO; | 3861 | return EMULATE_DO_MMIO; |
3492 | } | 3862 | } |
3493 | 3863 | ||
3494 | return EMULATE_DONE; | 3864 | done: |
3495 | } | 3865 | if (vcpu->arch.exception.pending) |
3496 | EXPORT_SYMBOL_GPL(emulate_instruction); | 3866 | vcpu->arch.emulate_ctxt.restart = false; |
3497 | |||
3498 | static int pio_copy_data(struct kvm_vcpu *vcpu) | ||
3499 | { | ||
3500 | void *p = vcpu->arch.pio_data; | ||
3501 | gva_t q = vcpu->arch.pio.guest_gva; | ||
3502 | unsigned bytes; | ||
3503 | int ret; | ||
3504 | u32 error_code; | ||
3505 | |||
3506 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | ||
3507 | if (vcpu->arch.pio.in) | ||
3508 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); | ||
3509 | else | ||
3510 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); | ||
3511 | |||
3512 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3513 | kvm_inject_page_fault(vcpu, q, error_code); | ||
3514 | |||
3515 | return ret; | ||
3516 | } | ||
3517 | |||
3518 | int complete_pio(struct kvm_vcpu *vcpu) | ||
3519 | { | ||
3520 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
3521 | long delta; | ||
3522 | int r; | ||
3523 | unsigned long val; | ||
3524 | |||
3525 | if (!io->string) { | ||
3526 | if (io->in) { | ||
3527 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
3528 | memcpy(&val, vcpu->arch.pio_data, io->size); | ||
3529 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | ||
3530 | } | ||
3531 | } else { | ||
3532 | if (io->in) { | ||
3533 | r = pio_copy_data(vcpu); | ||
3534 | if (r) | ||
3535 | goto out; | ||
3536 | } | ||
3537 | |||
3538 | delta = 1; | ||
3539 | if (io->rep) { | ||
3540 | delta *= io->cur_count; | ||
3541 | /* | ||
3542 | * The size of the register should really depend on | ||
3543 | * current address size. | ||
3544 | */ | ||
3545 | val = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
3546 | val -= delta; | ||
3547 | kvm_register_write(vcpu, VCPU_REGS_RCX, val); | ||
3548 | } | ||
3549 | if (io->down) | ||
3550 | delta = -delta; | ||
3551 | delta *= io->size; | ||
3552 | if (io->in) { | ||
3553 | val = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
3554 | val += delta; | ||
3555 | kvm_register_write(vcpu, VCPU_REGS_RDI, val); | ||
3556 | } else { | ||
3557 | val = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
3558 | val += delta; | ||
3559 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | ||
3560 | } | ||
3561 | } | ||
3562 | out: | ||
3563 | io->count -= io->cur_count; | ||
3564 | io->cur_count = 0; | ||
3565 | |||
3566 | return 0; | ||
3567 | } | ||
3568 | 3867 | ||
3569 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 3868 | if (vcpu->arch.emulate_ctxt.restart) |
3570 | { | 3869 | goto restart; |
3571 | /* TODO: String I/O for in kernel device */ | ||
3572 | int r; | ||
3573 | 3870 | ||
3574 | if (vcpu->arch.pio.in) | 3871 | return EMULATE_DONE; |
3575 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
3576 | vcpu->arch.pio.size, pd); | ||
3577 | else | ||
3578 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3579 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
3580 | pd); | ||
3581 | return r; | ||
3582 | } | ||
3583 | |||
3584 | static int pio_string_write(struct kvm_vcpu *vcpu) | ||
3585 | { | ||
3586 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
3587 | void *pd = vcpu->arch.pio_data; | ||
3588 | int i, r = 0; | ||
3589 | |||
3590 | for (i = 0; i < io->cur_count; i++) { | ||
3591 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3592 | io->port, io->size, pd)) { | ||
3593 | r = -EOPNOTSUPP; | ||
3594 | break; | ||
3595 | } | ||
3596 | pd += io->size; | ||
3597 | } | ||
3598 | return r; | ||
3599 | } | ||
3600 | |||
3601 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | ||
3602 | { | ||
3603 | unsigned long val; | ||
3604 | |||
3605 | trace_kvm_pio(!in, port, size, 1); | ||
3606 | |||
3607 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3608 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
3609 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
3610 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3611 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; | ||
3612 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
3613 | vcpu->arch.pio.in = in; | ||
3614 | vcpu->arch.pio.string = 0; | ||
3615 | vcpu->arch.pio.down = 0; | ||
3616 | vcpu->arch.pio.rep = 0; | ||
3617 | |||
3618 | if (!vcpu->arch.pio.in) { | ||
3619 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
3620 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
3621 | } | ||
3622 | |||
3623 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3624 | complete_pio(vcpu); | ||
3625 | return 1; | ||
3626 | } | ||
3627 | return 0; | ||
3628 | } | 3872 | } |
3629 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3873 | EXPORT_SYMBOL_GPL(emulate_instruction); |
3630 | 3874 | ||
3631 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | 3875 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
3632 | int size, unsigned long count, int down, | ||
3633 | gva_t address, int rep, unsigned port) | ||
3634 | { | 3876 | { |
3635 | unsigned now, in_page; | 3877 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3636 | int ret = 0; | 3878 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); |
3637 | 3879 | /* do not return to emulator after return from userspace */ | |
3638 | trace_kvm_pio(!in, port, size, count); | 3880 | vcpu->arch.pio.count = 0; |
3639 | |||
3640 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3641 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
3642 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
3643 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3644 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; | ||
3645 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
3646 | vcpu->arch.pio.in = in; | ||
3647 | vcpu->arch.pio.string = 1; | ||
3648 | vcpu->arch.pio.down = down; | ||
3649 | vcpu->arch.pio.rep = rep; | ||
3650 | |||
3651 | if (!count) { | ||
3652 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3653 | return 1; | ||
3654 | } | ||
3655 | |||
3656 | if (!down) | ||
3657 | in_page = PAGE_SIZE - offset_in_page(address); | ||
3658 | else | ||
3659 | in_page = offset_in_page(address) + size; | ||
3660 | now = min(count, (unsigned long)in_page / size); | ||
3661 | if (!now) | ||
3662 | now = 1; | ||
3663 | if (down) { | ||
3664 | /* | ||
3665 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
3666 | */ | ||
3667 | pr_unimpl(vcpu, "guest string pio down\n"); | ||
3668 | kvm_inject_gp(vcpu, 0); | ||
3669 | return 1; | ||
3670 | } | ||
3671 | vcpu->run->io.count = now; | ||
3672 | vcpu->arch.pio.cur_count = now; | ||
3673 | |||
3674 | if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) | ||
3675 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3676 | |||
3677 | vcpu->arch.pio.guest_gva = address; | ||
3678 | |||
3679 | if (!vcpu->arch.pio.in) { | ||
3680 | /* string PIO write */ | ||
3681 | ret = pio_copy_data(vcpu); | ||
3682 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3683 | return 1; | ||
3684 | if (ret == 0 && !pio_string_write(vcpu)) { | ||
3685 | complete_pio(vcpu); | ||
3686 | if (vcpu->arch.pio.count == 0) | ||
3687 | ret = 1; | ||
3688 | } | ||
3689 | } | ||
3690 | /* no string PIO read support yet */ | ||
3691 | |||
3692 | return ret; | 3881 | return ret; |
3693 | } | 3882 | } |
3694 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | 3883 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); |
3695 | 3884 | ||
3696 | static void bounce_off(void *info) | 3885 | static void bounce_off(void *info) |
3697 | { | 3886 | { |
@@ -4014,85 +4203,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
4014 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4203 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
4015 | } | 4204 | } |
4016 | 4205 | ||
4017 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
4018 | { | ||
4019 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
4020 | } | ||
4021 | |||
4022 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4206 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
4023 | { | 4207 | { |
4024 | struct descriptor_table dt = { limit, base }; | 4208 | struct desc_ptr dt = { limit, base }; |
4025 | 4209 | ||
4026 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4210 | kvm_x86_ops->set_gdt(vcpu, &dt); |
4027 | } | 4211 | } |
4028 | 4212 | ||
4029 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4213 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
4030 | { | 4214 | { |
4031 | struct descriptor_table dt = { limit, base }; | 4215 | struct desc_ptr dt = { limit, base }; |
4032 | 4216 | ||
4033 | kvm_x86_ops->set_idt(vcpu, &dt); | 4217 | kvm_x86_ops->set_idt(vcpu, &dt); |
4034 | } | 4218 | } |
4035 | 4219 | ||
4036 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
4037 | unsigned long *rflags) | ||
4038 | { | ||
4039 | kvm_lmsw(vcpu, msw); | ||
4040 | *rflags = kvm_get_rflags(vcpu); | ||
4041 | } | ||
4042 | |||
4043 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | ||
4044 | { | ||
4045 | unsigned long value; | ||
4046 | |||
4047 | switch (cr) { | ||
4048 | case 0: | ||
4049 | value = kvm_read_cr0(vcpu); | ||
4050 | break; | ||
4051 | case 2: | ||
4052 | value = vcpu->arch.cr2; | ||
4053 | break; | ||
4054 | case 3: | ||
4055 | value = vcpu->arch.cr3; | ||
4056 | break; | ||
4057 | case 4: | ||
4058 | value = kvm_read_cr4(vcpu); | ||
4059 | break; | ||
4060 | case 8: | ||
4061 | value = kvm_get_cr8(vcpu); | ||
4062 | break; | ||
4063 | default: | ||
4064 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
4065 | return 0; | ||
4066 | } | ||
4067 | |||
4068 | return value; | ||
4069 | } | ||
4070 | |||
4071 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | ||
4072 | unsigned long *rflags) | ||
4073 | { | ||
4074 | switch (cr) { | ||
4075 | case 0: | ||
4076 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
4077 | *rflags = kvm_get_rflags(vcpu); | ||
4078 | break; | ||
4079 | case 2: | ||
4080 | vcpu->arch.cr2 = val; | ||
4081 | break; | ||
4082 | case 3: | ||
4083 | kvm_set_cr3(vcpu, val); | ||
4084 | break; | ||
4085 | case 4: | ||
4086 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
4087 | break; | ||
4088 | case 8: | ||
4089 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
4090 | break; | ||
4091 | default: | ||
4092 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
4093 | } | ||
4094 | } | ||
4095 | |||
4096 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 4220 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
4097 | { | 4221 | { |
4098 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | 4222 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; |
@@ -4156,9 +4280,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | |||
4156 | { | 4280 | { |
4157 | struct kvm_cpuid_entry2 *best; | 4281 | struct kvm_cpuid_entry2 *best; |
4158 | 4282 | ||
4283 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
4284 | if (!best || best->eax < 0x80000008) | ||
4285 | goto not_found; | ||
4159 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | 4286 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
4160 | if (best) | 4287 | if (best) |
4161 | return best->eax & 0xff; | 4288 | return best->eax & 0xff; |
4289 | not_found: | ||
4162 | return 36; | 4290 | return 36; |
4163 | } | 4291 | } |
4164 | 4292 | ||
@@ -4272,6 +4400,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
4272 | { | 4400 | { |
4273 | /* try to reinject previous events if any */ | 4401 | /* try to reinject previous events if any */ |
4274 | if (vcpu->arch.exception.pending) { | 4402 | if (vcpu->arch.exception.pending) { |
4403 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | ||
4404 | vcpu->arch.exception.has_error_code, | ||
4405 | vcpu->arch.exception.error_code); | ||
4275 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, | 4406 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, |
4276 | vcpu->arch.exception.has_error_code, | 4407 | vcpu->arch.exception.has_error_code, |
4277 | vcpu->arch.exception.error_code); | 4408 | vcpu->arch.exception.error_code); |
@@ -4532,24 +4663,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4532 | if (!irqchip_in_kernel(vcpu->kvm)) | 4663 | if (!irqchip_in_kernel(vcpu->kvm)) |
4533 | kvm_set_cr8(vcpu, kvm_run->cr8); | 4664 | kvm_set_cr8(vcpu, kvm_run->cr8); |
4534 | 4665 | ||
4535 | if (vcpu->arch.pio.cur_count) { | 4666 | if (vcpu->arch.pio.count || vcpu->mmio_needed || |
4536 | r = complete_pio(vcpu); | 4667 | vcpu->arch.emulate_ctxt.restart) { |
4537 | if (r) | 4668 | if (vcpu->mmio_needed) { |
4538 | goto out; | 4669 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
4539 | } | 4670 | vcpu->mmio_read_completed = 1; |
4540 | if (vcpu->mmio_needed) { | 4671 | vcpu->mmio_needed = 0; |
4541 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 4672 | } |
4542 | vcpu->mmio_read_completed = 1; | ||
4543 | vcpu->mmio_needed = 0; | ||
4544 | |||
4545 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4673 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4546 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4674 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
4547 | EMULTYPE_NO_DECODE); | ||
4548 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4675 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4549 | if (r == EMULATE_DO_MMIO) { | 4676 | if (r == EMULATE_DO_MMIO) { |
4550 | /* | ||
4551 | * Read-modify-write. Back to userspace. | ||
4552 | */ | ||
4553 | r = 0; | 4677 | r = 0; |
4554 | goto out; | 4678 | goto out; |
4555 | } | 4679 | } |
@@ -4632,12 +4756,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4632 | return 0; | 4756 | return 0; |
4633 | } | 4757 | } |
4634 | 4758 | ||
4635 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
4636 | struct kvm_segment *var, int seg) | ||
4637 | { | ||
4638 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
4639 | } | ||
4640 | |||
4641 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 4759 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
4642 | { | 4760 | { |
4643 | struct kvm_segment cs; | 4761 | struct kvm_segment cs; |
@@ -4651,7 +4769,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | |||
4651 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 4769 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
4652 | struct kvm_sregs *sregs) | 4770 | struct kvm_sregs *sregs) |
4653 | { | 4771 | { |
4654 | struct descriptor_table dt; | 4772 | struct desc_ptr dt; |
4655 | 4773 | ||
4656 | vcpu_load(vcpu); | 4774 | vcpu_load(vcpu); |
4657 | 4775 | ||
@@ -4666,11 +4784,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4666 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 4784 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
4667 | 4785 | ||
4668 | kvm_x86_ops->get_idt(vcpu, &dt); | 4786 | kvm_x86_ops->get_idt(vcpu, &dt); |
4669 | sregs->idt.limit = dt.limit; | 4787 | sregs->idt.limit = dt.size; |
4670 | sregs->idt.base = dt.base; | 4788 | sregs->idt.base = dt.address; |
4671 | kvm_x86_ops->get_gdt(vcpu, &dt); | 4789 | kvm_x86_ops->get_gdt(vcpu, &dt); |
4672 | sregs->gdt.limit = dt.limit; | 4790 | sregs->gdt.limit = dt.size; |
4673 | sregs->gdt.base = dt.base; | 4791 | sregs->gdt.base = dt.address; |
4674 | 4792 | ||
4675 | sregs->cr0 = kvm_read_cr0(vcpu); | 4793 | sregs->cr0 = kvm_read_cr0(vcpu); |
4676 | sregs->cr2 = vcpu->arch.cr2; | 4794 | sregs->cr2 = vcpu->arch.cr2; |
@@ -4709,559 +4827,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
4709 | return 0; | 4827 | return 0; |
4710 | } | 4828 | } |
4711 | 4829 | ||
4712 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 4830 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
4713 | struct kvm_segment *var, int seg) | 4831 | bool has_error_code, u32 error_code) |
4714 | { | ||
4715 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
4716 | } | ||
4717 | |||
4718 | static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | ||
4719 | struct kvm_segment *kvm_desct) | ||
4720 | { | ||
4721 | kvm_desct->base = get_desc_base(seg_desc); | ||
4722 | kvm_desct->limit = get_desc_limit(seg_desc); | ||
4723 | if (seg_desc->g) { | ||
4724 | kvm_desct->limit <<= 12; | ||
4725 | kvm_desct->limit |= 0xfff; | ||
4726 | } | ||
4727 | kvm_desct->selector = selector; | ||
4728 | kvm_desct->type = seg_desc->type; | ||
4729 | kvm_desct->present = seg_desc->p; | ||
4730 | kvm_desct->dpl = seg_desc->dpl; | ||
4731 | kvm_desct->db = seg_desc->d; | ||
4732 | kvm_desct->s = seg_desc->s; | ||
4733 | kvm_desct->l = seg_desc->l; | ||
4734 | kvm_desct->g = seg_desc->g; | ||
4735 | kvm_desct->avl = seg_desc->avl; | ||
4736 | if (!selector) | ||
4737 | kvm_desct->unusable = 1; | ||
4738 | else | ||
4739 | kvm_desct->unusable = 0; | ||
4740 | kvm_desct->padding = 0; | ||
4741 | } | ||
4742 | |||
4743 | static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, | ||
4744 | u16 selector, | ||
4745 | struct descriptor_table *dtable) | ||
4746 | { | ||
4747 | if (selector & 1 << 2) { | ||
4748 | struct kvm_segment kvm_seg; | ||
4749 | |||
4750 | kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); | ||
4751 | |||
4752 | if (kvm_seg.unusable) | ||
4753 | dtable->limit = 0; | ||
4754 | else | ||
4755 | dtable->limit = kvm_seg.limit; | ||
4756 | dtable->base = kvm_seg.base; | ||
4757 | } | ||
4758 | else | ||
4759 | kvm_x86_ops->get_gdt(vcpu, dtable); | ||
4760 | } | ||
4761 | |||
4762 | /* allowed just for 8 bytes segments */ | ||
4763 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
4764 | struct desc_struct *seg_desc) | ||
4765 | { | ||
4766 | struct descriptor_table dtable; | ||
4767 | u16 index = selector >> 3; | ||
4768 | int ret; | ||
4769 | u32 err; | ||
4770 | gva_t addr; | ||
4771 | |||
4772 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
4773 | |||
4774 | if (dtable.limit < index * 8 + 7) { | ||
4775 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | ||
4776 | return X86EMUL_PROPAGATE_FAULT; | ||
4777 | } | ||
4778 | addr = dtable.base + index * 8; | ||
4779 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
4780 | vcpu, &err); | ||
4781 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
4782 | kvm_inject_page_fault(vcpu, addr, err); | ||
4783 | |||
4784 | return ret; | ||
4785 | } | ||
4786 | |||
4787 | /* allowed just for 8 bytes segments */ | ||
4788 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
4789 | struct desc_struct *seg_desc) | ||
4790 | { | ||
4791 | struct descriptor_table dtable; | ||
4792 | u16 index = selector >> 3; | ||
4793 | |||
4794 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
4795 | |||
4796 | if (dtable.limit < index * 8 + 7) | ||
4797 | return 1; | ||
4798 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); | ||
4799 | } | ||
4800 | |||
4801 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
4802 | struct desc_struct *seg_desc) | ||
4803 | { | ||
4804 | u32 base_addr = get_desc_base(seg_desc); | ||
4805 | |||
4806 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
4807 | } | ||
4808 | |||
4809 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, | ||
4810 | struct desc_struct *seg_desc) | ||
4811 | { | ||
4812 | u32 base_addr = get_desc_base(seg_desc); | ||
4813 | |||
4814 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); | ||
4815 | } | ||
4816 | |||
4817 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | ||
4818 | { | ||
4819 | struct kvm_segment kvm_seg; | ||
4820 | |||
4821 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4822 | return kvm_seg.selector; | ||
4823 | } | ||
4824 | |||
4825 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
4826 | { | ||
4827 | struct kvm_segment segvar = { | ||
4828 | .base = selector << 4, | ||
4829 | .limit = 0xffff, | ||
4830 | .selector = selector, | ||
4831 | .type = 3, | ||
4832 | .present = 1, | ||
4833 | .dpl = 3, | ||
4834 | .db = 0, | ||
4835 | .s = 1, | ||
4836 | .l = 0, | ||
4837 | .g = 0, | ||
4838 | .avl = 0, | ||
4839 | .unusable = 0, | ||
4840 | }; | ||
4841 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | ||
4842 | return X86EMUL_CONTINUE; | ||
4843 | } | ||
4844 | |||
4845 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | ||
4846 | { | 4832 | { |
4847 | return (seg != VCPU_SREG_LDTR) && | 4833 | int cs_db, cs_l, ret; |
4848 | (seg != VCPU_SREG_TR) && | 4834 | cache_all_regs(vcpu); |
4849 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | ||
4850 | } | ||
4851 | |||
4852 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
4853 | { | ||
4854 | struct kvm_segment kvm_seg; | ||
4855 | struct desc_struct seg_desc; | ||
4856 | u8 dpl, rpl, cpl; | ||
4857 | unsigned err_vec = GP_VECTOR; | ||
4858 | u32 err_code = 0; | ||
4859 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
4860 | int ret; | ||
4861 | 4835 | ||
4862 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) | 4836 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
4863 | return kvm_load_realmode_segment(vcpu, selector, seg); | ||
4864 | 4837 | ||
4865 | /* NULL selector is not valid for TR, CS and SS */ | 4838 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
4866 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | 4839 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
4867 | && null_selector) | 4840 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); |
4868 | goto exception; | 4841 | vcpu->arch.emulate_ctxt.mode = |
4842 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
4843 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
4844 | ? X86EMUL_MODE_VM86 : cs_l | ||
4845 | ? X86EMUL_MODE_PROT64 : cs_db | ||
4846 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
4869 | 4847 | ||
4870 | /* TR should be in GDT only */ | 4848 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
4871 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | 4849 | tss_selector, reason, has_error_code, |
4872 | goto exception; | 4850 | error_code); |
4873 | 4851 | ||
4874 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4875 | if (ret) | 4852 | if (ret) |
4876 | return ret; | 4853 | return EMULATE_FAIL; |
4877 | |||
4878 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
4879 | |||
4880 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
4881 | kvm_seg.unusable = 1; | ||
4882 | goto load; | ||
4883 | } | ||
4884 | |||
4885 | err_code = selector & 0xfffc; | ||
4886 | err_vec = GP_VECTOR; | ||
4887 | |||
4888 | /* can't load system descriptor into segment selecor */ | ||
4889 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
4890 | goto exception; | ||
4891 | |||
4892 | if (!kvm_seg.present) { | ||
4893 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
4894 | goto exception; | ||
4895 | } | ||
4896 | |||
4897 | rpl = selector & 3; | ||
4898 | dpl = kvm_seg.dpl; | ||
4899 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
4900 | |||
4901 | switch (seg) { | ||
4902 | case VCPU_SREG_SS: | ||
4903 | /* | ||
4904 | * segment is not a writable data segment or segment | ||
4905 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
4906 | */ | ||
4907 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
4908 | goto exception; | ||
4909 | break; | ||
4910 | case VCPU_SREG_CS: | ||
4911 | if (!(kvm_seg.type & 8)) | ||
4912 | goto exception; | ||
4913 | |||
4914 | if (kvm_seg.type & 4) { | ||
4915 | /* conforming */ | ||
4916 | if (dpl > cpl) | ||
4917 | goto exception; | ||
4918 | } else { | ||
4919 | /* nonconforming */ | ||
4920 | if (rpl > cpl || dpl != cpl) | ||
4921 | goto exception; | ||
4922 | } | ||
4923 | /* CS(RPL) <- CPL */ | ||
4924 | selector = (selector & 0xfffc) | cpl; | ||
4925 | break; | ||
4926 | case VCPU_SREG_TR: | ||
4927 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
4928 | goto exception; | ||
4929 | break; | ||
4930 | case VCPU_SREG_LDTR: | ||
4931 | if (kvm_seg.s || kvm_seg.type != 2) | ||
4932 | goto exception; | ||
4933 | break; | ||
4934 | default: /* DS, ES, FS, or GS */ | ||
4935 | /* | ||
4936 | * segment is not a data or readable code segment or | ||
4937 | * ((segment is a data or nonconforming code segment) | ||
4938 | * and (both RPL and CPL > DPL)) | ||
4939 | */ | ||
4940 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
4941 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
4942 | goto exception; | ||
4943 | break; | ||
4944 | } | ||
4945 | |||
4946 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
4947 | /* mark segment as accessed */ | ||
4948 | kvm_seg.type |= 1; | ||
4949 | seg_desc.type |= 1; | ||
4950 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4951 | } | ||
4952 | load: | ||
4953 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4954 | return X86EMUL_CONTINUE; | ||
4955 | exception: | ||
4956 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
4957 | return X86EMUL_PROPAGATE_FAULT; | ||
4958 | } | ||
4959 | |||
4960 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | ||
4961 | struct tss_segment_32 *tss) | ||
4962 | { | ||
4963 | tss->cr3 = vcpu->arch.cr3; | ||
4964 | tss->eip = kvm_rip_read(vcpu); | ||
4965 | tss->eflags = kvm_get_rflags(vcpu); | ||
4966 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
4967 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
4968 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
4969 | tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
4970 | tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
4971 | tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
4972 | tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
4973 | tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
4974 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
4975 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
4976 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
4977 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
4978 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | ||
4979 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | ||
4980 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
4981 | } | ||
4982 | |||
4983 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
4984 | { | ||
4985 | struct kvm_segment kvm_seg; | ||
4986 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4987 | kvm_seg.selector = sel; | ||
4988 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4989 | } | ||
4990 | |||
4991 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | ||
4992 | struct tss_segment_32 *tss) | ||
4993 | { | ||
4994 | kvm_set_cr3(vcpu, tss->cr3); | ||
4995 | |||
4996 | kvm_rip_write(vcpu, tss->eip); | ||
4997 | kvm_set_rflags(vcpu, tss->eflags | 2); | ||
4998 | |||
4999 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | ||
5000 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | ||
5001 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); | ||
5002 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); | ||
5003 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); | ||
5004 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); | ||
5005 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | ||
5006 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | ||
5007 | |||
5008 | /* | ||
5009 | * SDM says that segment selectors are loaded before segment | ||
5010 | * descriptors | ||
5011 | */ | ||
5012 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
5013 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5014 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5015 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5016 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5017 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
5018 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
5019 | |||
5020 | /* | ||
5021 | * Now load segment descriptors. If fault happenes at this stage | ||
5022 | * it is handled in a context of new task | ||
5023 | */ | ||
5024 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
5025 | return 1; | ||
5026 | |||
5027 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
5028 | return 1; | ||
5029 | |||
5030 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
5031 | return 1; | ||
5032 | |||
5033 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
5034 | return 1; | ||
5035 | |||
5036 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
5037 | return 1; | ||
5038 | |||
5039 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) | ||
5040 | return 1; | ||
5041 | |||
5042 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) | ||
5043 | return 1; | ||
5044 | return 0; | ||
5045 | } | ||
5046 | |||
5047 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | ||
5048 | struct tss_segment_16 *tss) | ||
5049 | { | ||
5050 | tss->ip = kvm_rip_read(vcpu); | ||
5051 | tss->flag = kvm_get_rflags(vcpu); | ||
5052 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
5053 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
5054 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
5055 | tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
5056 | tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
5057 | tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
5058 | tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
5059 | tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
5060 | |||
5061 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
5062 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
5063 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
5064 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
5065 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
5066 | } | ||
5067 | |||
5068 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | ||
5069 | struct tss_segment_16 *tss) | ||
5070 | { | ||
5071 | kvm_rip_write(vcpu, tss->ip); | ||
5072 | kvm_set_rflags(vcpu, tss->flag | 2); | ||
5073 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | ||
5074 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | ||
5075 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | ||
5076 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); | ||
5077 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); | ||
5078 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); | ||
5079 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | ||
5080 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | ||
5081 | |||
5082 | /* | ||
5083 | * SDM says that segment selectors are loaded before segment | ||
5084 | * descriptors | ||
5085 | */ | ||
5086 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
5087 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5088 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5089 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5090 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5091 | |||
5092 | /* | ||
5093 | * Now load segment descriptors. If fault happenes at this stage | ||
5094 | * it is handled in a context of new task | ||
5095 | */ | ||
5096 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
5097 | return 1; | ||
5098 | |||
5099 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
5100 | return 1; | ||
5101 | |||
5102 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
5103 | return 1; | ||
5104 | |||
5105 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
5106 | return 1; | ||
5107 | |||
5108 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
5109 | return 1; | ||
5110 | return 0; | ||
5111 | } | ||
5112 | |||
5113 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
5114 | u16 old_tss_sel, u32 old_tss_base, | ||
5115 | struct desc_struct *nseg_desc) | ||
5116 | { | ||
5117 | struct tss_segment_16 tss_segment_16; | ||
5118 | int ret = 0; | ||
5119 | |||
5120 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
5121 | sizeof tss_segment_16)) | ||
5122 | goto out; | ||
5123 | |||
5124 | save_state_to_tss16(vcpu, &tss_segment_16); | ||
5125 | |||
5126 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
5127 | sizeof tss_segment_16)) | ||
5128 | goto out; | ||
5129 | |||
5130 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
5131 | &tss_segment_16, sizeof tss_segment_16)) | ||
5132 | goto out; | ||
5133 | |||
5134 | if (old_tss_sel != 0xffff) { | ||
5135 | tss_segment_16.prev_task_link = old_tss_sel; | ||
5136 | |||
5137 | if (kvm_write_guest(vcpu->kvm, | ||
5138 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
5139 | &tss_segment_16.prev_task_link, | ||
5140 | sizeof tss_segment_16.prev_task_link)) | ||
5141 | goto out; | ||
5142 | } | ||
5143 | |||
5144 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | ||
5145 | goto out; | ||
5146 | |||
5147 | ret = 1; | ||
5148 | out: | ||
5149 | return ret; | ||
5150 | } | ||
5151 | |||
5152 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
5153 | u16 old_tss_sel, u32 old_tss_base, | ||
5154 | struct desc_struct *nseg_desc) | ||
5155 | { | ||
5156 | struct tss_segment_32 tss_segment_32; | ||
5157 | int ret = 0; | ||
5158 | |||
5159 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
5160 | sizeof tss_segment_32)) | ||
5161 | goto out; | ||
5162 | |||
5163 | save_state_to_tss32(vcpu, &tss_segment_32); | ||
5164 | |||
5165 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
5166 | sizeof tss_segment_32)) | ||
5167 | goto out; | ||
5168 | |||
5169 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
5170 | &tss_segment_32, sizeof tss_segment_32)) | ||
5171 | goto out; | ||
5172 | |||
5173 | if (old_tss_sel != 0xffff) { | ||
5174 | tss_segment_32.prev_task_link = old_tss_sel; | ||
5175 | |||
5176 | if (kvm_write_guest(vcpu->kvm, | ||
5177 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
5178 | &tss_segment_32.prev_task_link, | ||
5179 | sizeof tss_segment_32.prev_task_link)) | ||
5180 | goto out; | ||
5181 | } | ||
5182 | |||
5183 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | ||
5184 | goto out; | ||
5185 | |||
5186 | ret = 1; | ||
5187 | out: | ||
5188 | return ret; | ||
5189 | } | ||
5190 | |||
5191 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | ||
5192 | { | ||
5193 | struct kvm_segment tr_seg; | ||
5194 | struct desc_struct cseg_desc; | ||
5195 | struct desc_struct nseg_desc; | ||
5196 | int ret = 0; | ||
5197 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
5198 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
5199 | |||
5200 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); | ||
5201 | |||
5202 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
5203 | * descriptors should generate a pagefault. | ||
5204 | */ | ||
5205 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | ||
5206 | goto out; | ||
5207 | |||
5208 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) | ||
5209 | goto out; | ||
5210 | |||
5211 | if (reason != TASK_SWITCH_IRET) { | ||
5212 | int cpl; | ||
5213 | |||
5214 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
5215 | if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { | ||
5216 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
5217 | return 1; | ||
5218 | } | ||
5219 | } | ||
5220 | 4854 | ||
5221 | if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { | 4855 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
5222 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | 4856 | return EMULATE_DONE; |
5223 | return 1; | ||
5224 | } | ||
5225 | |||
5226 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
5227 | cseg_desc.type &= ~(1 << 1); //clear the B flag | ||
5228 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); | ||
5229 | } | ||
5230 | |||
5231 | if (reason == TASK_SWITCH_IRET) { | ||
5232 | u32 eflags = kvm_get_rflags(vcpu); | ||
5233 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | ||
5234 | } | ||
5235 | |||
5236 | /* set back link to prev task only if NT bit is set in eflags | ||
5237 | note that old_tss_sel is not used afetr this point */ | ||
5238 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
5239 | old_tss_sel = 0xffff; | ||
5240 | |||
5241 | if (nseg_desc.type & 8) | ||
5242 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | ||
5243 | old_tss_base, &nseg_desc); | ||
5244 | else | ||
5245 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, | ||
5246 | old_tss_base, &nseg_desc); | ||
5247 | |||
5248 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | ||
5249 | u32 eflags = kvm_get_rflags(vcpu); | ||
5250 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); | ||
5251 | } | ||
5252 | |||
5253 | if (reason != TASK_SWITCH_IRET) { | ||
5254 | nseg_desc.type |= (1 << 1); | ||
5255 | save_guest_segment_descriptor(vcpu, tss_selector, | ||
5256 | &nseg_desc); | ||
5257 | } | ||
5258 | |||
5259 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); | ||
5260 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | ||
5261 | tr_seg.type = 11; | ||
5262 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | ||
5263 | out: | ||
5264 | return ret; | ||
5265 | } | 4857 | } |
5266 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 4858 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
5267 | 4859 | ||
@@ -5270,15 +4862,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5270 | { | 4862 | { |
5271 | int mmu_reset_needed = 0; | 4863 | int mmu_reset_needed = 0; |
5272 | int pending_vec, max_bits; | 4864 | int pending_vec, max_bits; |
5273 | struct descriptor_table dt; | 4865 | struct desc_ptr dt; |
5274 | 4866 | ||
5275 | vcpu_load(vcpu); | 4867 | vcpu_load(vcpu); |
5276 | 4868 | ||
5277 | dt.limit = sregs->idt.limit; | 4869 | dt.size = sregs->idt.limit; |
5278 | dt.base = sregs->idt.base; | 4870 | dt.address = sregs->idt.base; |
5279 | kvm_x86_ops->set_idt(vcpu, &dt); | 4871 | kvm_x86_ops->set_idt(vcpu, &dt); |
5280 | dt.limit = sregs->gdt.limit; | 4872 | dt.size = sregs->gdt.limit; |
5281 | dt.base = sregs->gdt.base; | 4873 | dt.address = sregs->gdt.base; |
5282 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4874 | kvm_x86_ops->set_gdt(vcpu, &dt); |
5283 | 4875 | ||
5284 | vcpu->arch.cr2 = sregs->cr2; | 4876 | vcpu->arch.cr2 = sregs->cr2; |
@@ -5377,11 +4969,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5377 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 4969 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
5378 | } | 4970 | } |
5379 | 4971 | ||
5380 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | 4972 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5381 | vcpu->arch.singlestep_cs = | 4973 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + |
5382 | get_segment_selector(vcpu, VCPU_SREG_CS); | 4974 | get_segment_base(vcpu, VCPU_SREG_CS); |
5383 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
5384 | } | ||
5385 | 4975 | ||
5386 | /* | 4976 | /* |
5387 | * Trigger an rflags update that will inject or remove the trace | 4977 | * Trigger an rflags update that will inject or remove the trace |
@@ -5872,13 +5462,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
5872 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5462 | return kvm_x86_ops->interrupt_allowed(vcpu); |
5873 | } | 5463 | } |
5874 | 5464 | ||
5465 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) | ||
5466 | { | ||
5467 | unsigned long current_rip = kvm_rip_read(vcpu) + | ||
5468 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
5469 | |||
5470 | return current_rip == linear_rip; | ||
5471 | } | ||
5472 | EXPORT_SYMBOL_GPL(kvm_is_linear_rip); | ||
5473 | |||
5875 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | 5474 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) |
5876 | { | 5475 | { |
5877 | unsigned long rflags; | 5476 | unsigned long rflags; |
5878 | 5477 | ||
5879 | rflags = kvm_x86_ops->get_rflags(vcpu); | 5478 | rflags = kvm_x86_ops->get_rflags(vcpu); |
5880 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 5479 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5881 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | 5480 | rflags &= ~X86_EFLAGS_TF; |
5882 | return rflags; | 5481 | return rflags; |
5883 | } | 5482 | } |
5884 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | 5483 | EXPORT_SYMBOL_GPL(kvm_get_rflags); |
@@ -5886,10 +5485,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags); | |||
5886 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 5485 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
5887 | { | 5486 | { |
5888 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | 5487 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && |
5889 | vcpu->arch.singlestep_cs == | 5488 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) |
5890 | get_segment_selector(vcpu, VCPU_SREG_CS) && | 5489 | rflags |= X86_EFLAGS_TF; |
5891 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
5892 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
5893 | kvm_x86_ops->set_rflags(vcpu, rflags); | 5490 | kvm_x86_ops->set_rflags(vcpu, rflags); |
5894 | } | 5491 | } |
5895 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 5492 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
@@ -5905,3 +5502,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | |||
5905 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | 5502 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); |
5906 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | 5503 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); |
5907 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | 5504 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); |
5505 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b7a404722d2b..f4b54458285b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -65,6 +65,13 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
66 | } | 66 | } |
67 | 67 | ||
68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
69 | { | ||
70 | return rcu_dereference_check(kvm->arch.aliases, | ||
71 | srcu_read_lock_held(&kvm->srcu) | ||
72 | || lockdep_is_held(&kvm->slots_lock)); | ||
73 | } | ||
74 | |||
68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
70 | 77 | ||