diff options
Diffstat (limited to 'arch/x86/kvm')
| -rw-r--r-- | arch/x86/kvm/emulate.c | 673 | ||||
| -rw-r--r-- | arch/x86/kvm/i8254.c | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/i8259.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/irq.c | 74 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 140 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.h | 34 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.c | 194 | ||||
| -rw-r--r-- | arch/x86/kvm/mmutrace.h | 6 | ||||
| -rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 106 | ||||
| -rw-r--r-- | arch/x86/kvm/svm.c | 24 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 714 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 208 |
12 files changed, 1401 insertions, 775 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a27e76371108..a335cc6cde72 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
| 25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
| 26 | #include <asm/kvm_emulate.h> | 26 | #include <asm/kvm_emulate.h> |
| 27 | #include <linux/stringify.h> | ||
| 27 | 28 | ||
| 28 | #include "x86.h" | 29 | #include "x86.h" |
| 29 | #include "tss.h" | 30 | #include "tss.h" |
| @@ -43,7 +44,7 @@ | |||
| 43 | #define OpCL 9ull /* CL register (for shifts) */ | 44 | #define OpCL 9ull /* CL register (for shifts) */ |
| 44 | #define OpImmByte 10ull /* 8-bit sign extended immediate */ | 45 | #define OpImmByte 10ull /* 8-bit sign extended immediate */ |
| 45 | #define OpOne 11ull /* Implied 1 */ | 46 | #define OpOne 11ull /* Implied 1 */ |
| 46 | #define OpImm 12ull /* Sign extended immediate */ | 47 | #define OpImm 12ull /* Sign extended up to 32-bit immediate */ |
| 47 | #define OpMem16 13ull /* Memory operand (16-bit). */ | 48 | #define OpMem16 13ull /* Memory operand (16-bit). */ |
| 48 | #define OpMem32 14ull /* Memory operand (32-bit). */ | 49 | #define OpMem32 14ull /* Memory operand (32-bit). */ |
| 49 | #define OpImmU 15ull /* Immediate operand, zero extended */ | 50 | #define OpImmU 15ull /* Immediate operand, zero extended */ |
| @@ -58,6 +59,7 @@ | |||
| 58 | #define OpFS 24ull /* FS */ | 59 | #define OpFS 24ull /* FS */ |
| 59 | #define OpGS 25ull /* GS */ | 60 | #define OpGS 25ull /* GS */ |
| 60 | #define OpMem8 26ull /* 8-bit zero extended memory operand */ | 61 | #define OpMem8 26ull /* 8-bit zero extended memory operand */ |
| 62 | #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */ | ||
| 61 | 63 | ||
| 62 | #define OpBits 5 /* Width of operand field */ | 64 | #define OpBits 5 /* Width of operand field */ |
| 63 | #define OpMask ((1ull << OpBits) - 1) | 65 | #define OpMask ((1ull << OpBits) - 1) |
| @@ -101,6 +103,7 @@ | |||
| 101 | #define SrcMemFAddr (OpMemFAddr << SrcShift) | 103 | #define SrcMemFAddr (OpMemFAddr << SrcShift) |
| 102 | #define SrcAcc (OpAcc << SrcShift) | 104 | #define SrcAcc (OpAcc << SrcShift) |
| 103 | #define SrcImmU16 (OpImmU16 << SrcShift) | 105 | #define SrcImmU16 (OpImmU16 << SrcShift) |
| 106 | #define SrcImm64 (OpImm64 << SrcShift) | ||
| 104 | #define SrcDX (OpDX << SrcShift) | 107 | #define SrcDX (OpDX << SrcShift) |
| 105 | #define SrcMem8 (OpMem8 << SrcShift) | 108 | #define SrcMem8 (OpMem8 << SrcShift) |
| 106 | #define SrcMask (OpMask << SrcShift) | 109 | #define SrcMask (OpMask << SrcShift) |
| @@ -113,6 +116,7 @@ | |||
| 113 | #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ | 116 | #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ |
| 114 | #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ | 117 | #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ |
| 115 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ | 118 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ |
| 119 | #define Escape (5<<15) /* Escape to coprocessor instruction */ | ||
| 116 | #define Sse (1<<18) /* SSE Vector instruction */ | 120 | #define Sse (1<<18) /* SSE Vector instruction */ |
| 117 | /* Generic ModRM decode. */ | 121 | /* Generic ModRM decode. */ |
| 118 | #define ModRM (1<<19) | 122 | #define ModRM (1<<19) |
| @@ -146,6 +150,8 @@ | |||
| 146 | #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ | 150 | #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ |
| 147 | #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ | 151 | #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ |
| 148 | #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ | 152 | #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ |
| 153 | #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ | ||
| 154 | #define NoWrite ((u64)1 << 45) /* No writeback */ | ||
| 149 | 155 | ||
| 150 | #define X2(x...) x, x | 156 | #define X2(x...) x, x |
| 151 | #define X3(x...) X2(x), x | 157 | #define X3(x...) X2(x), x |
| @@ -156,6 +162,27 @@ | |||
| 156 | #define X8(x...) X4(x), X4(x) | 162 | #define X8(x...) X4(x), X4(x) |
| 157 | #define X16(x...) X8(x), X8(x) | 163 | #define X16(x...) X8(x), X8(x) |
| 158 | 164 | ||
| 165 | #define NR_FASTOP (ilog2(sizeof(ulong)) + 1) | ||
| 166 | #define FASTOP_SIZE 8 | ||
| 167 | |||
| 168 | /* | ||
| 169 | * fastop functions have a special calling convention: | ||
| 170 | * | ||
| 171 | * dst: [rdx]:rax (in/out) | ||
| 172 | * src: rbx (in/out) | ||
| 173 | * src2: rcx (in) | ||
| 174 | * flags: rflags (in/out) | ||
| 175 | * | ||
| 176 | * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for | ||
| 177 | * different operand sizes can be reached by calculation, rather than a jump | ||
| 178 | * table (which would be bigger than the code). | ||
| 179 | * | ||
| 180 | * fastop functions are declared as taking a never-defined fastop parameter, | ||
| 181 | * so they can't be called from C directly. | ||
| 182 | */ | ||
| 183 | |||
| 184 | struct fastop; | ||
| 185 | |||
| 159 | struct opcode { | 186 | struct opcode { |
| 160 | u64 flags : 56; | 187 | u64 flags : 56; |
| 161 | u64 intercept : 8; | 188 | u64 intercept : 8; |
| @@ -164,6 +191,8 @@ struct opcode { | |||
| 164 | const struct opcode *group; | 191 | const struct opcode *group; |
| 165 | const struct group_dual *gdual; | 192 | const struct group_dual *gdual; |
| 166 | const struct gprefix *gprefix; | 193 | const struct gprefix *gprefix; |
| 194 | const struct escape *esc; | ||
| 195 | void (*fastop)(struct fastop *fake); | ||
| 167 | } u; | 196 | } u; |
| 168 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | 197 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); |
| 169 | }; | 198 | }; |
| @@ -180,6 +209,11 @@ struct gprefix { | |||
| 180 | struct opcode pfx_f3; | 209 | struct opcode pfx_f3; |
| 181 | }; | 210 | }; |
| 182 | 211 | ||
| 212 | struct escape { | ||
| 213 | struct opcode op[8]; | ||
| 214 | struct opcode high[64]; | ||
| 215 | }; | ||
| 216 | |||
| 183 | /* EFLAGS bit definitions. */ | 217 | /* EFLAGS bit definitions. */ |
| 184 | #define EFLG_ID (1<<21) | 218 | #define EFLG_ID (1<<21) |
| 185 | #define EFLG_VIP (1<<20) | 219 | #define EFLG_VIP (1<<20) |
| @@ -407,6 +441,97 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) | |||
| 407 | } \ | 441 | } \ |
| 408 | } while (0) | 442 | } while (0) |
| 409 | 443 | ||
| 444 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); | ||
| 445 | |||
| 446 | #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" | ||
| 447 | #define FOP_RET "ret \n\t" | ||
| 448 | |||
| 449 | #define FOP_START(op) \ | ||
| 450 | extern void em_##op(struct fastop *fake); \ | ||
| 451 | asm(".pushsection .text, \"ax\" \n\t" \ | ||
| 452 | ".global em_" #op " \n\t" \ | ||
| 453 | FOP_ALIGN \ | ||
| 454 | "em_" #op ": \n\t" | ||
| 455 | |||
| 456 | #define FOP_END \ | ||
| 457 | ".popsection") | ||
| 458 | |||
| 459 | #define FOPNOP() FOP_ALIGN FOP_RET | ||
| 460 | |||
| 461 | #define FOP1E(op, dst) \ | ||
| 462 | FOP_ALIGN #op " %" #dst " \n\t" FOP_RET | ||
| 463 | |||
| 464 | #define FASTOP1(op) \ | ||
| 465 | FOP_START(op) \ | ||
| 466 | FOP1E(op##b, al) \ | ||
| 467 | FOP1E(op##w, ax) \ | ||
| 468 | FOP1E(op##l, eax) \ | ||
| 469 | ON64(FOP1E(op##q, rax)) \ | ||
| 470 | FOP_END | ||
| 471 | |||
| 472 | #define FOP2E(op, dst, src) \ | ||
| 473 | FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET | ||
| 474 | |||
| 475 | #define FASTOP2(op) \ | ||
| 476 | FOP_START(op) \ | ||
| 477 | FOP2E(op##b, al, bl) \ | ||
| 478 | FOP2E(op##w, ax, bx) \ | ||
| 479 | FOP2E(op##l, eax, ebx) \ | ||
| 480 | ON64(FOP2E(op##q, rax, rbx)) \ | ||
| 481 | FOP_END | ||
| 482 | |||
| 483 | /* 2 operand, word only */ | ||
| 484 | #define FASTOP2W(op) \ | ||
| 485 | FOP_START(op) \ | ||
| 486 | FOPNOP() \ | ||
| 487 | FOP2E(op##w, ax, bx) \ | ||
| 488 | FOP2E(op##l, eax, ebx) \ | ||
| 489 | ON64(FOP2E(op##q, rax, rbx)) \ | ||
| 490 | FOP_END | ||
| 491 | |||
| 492 | /* 2 operand, src is CL */ | ||
| 493 | #define FASTOP2CL(op) \ | ||
| 494 | FOP_START(op) \ | ||
| 495 | FOP2E(op##b, al, cl) \ | ||
| 496 | FOP2E(op##w, ax, cl) \ | ||
| 497 | FOP2E(op##l, eax, cl) \ | ||
| 498 | ON64(FOP2E(op##q, rax, cl)) \ | ||
| 499 | FOP_END | ||
| 500 | |||
| 501 | #define FOP3E(op, dst, src, src2) \ | ||
| 502 | FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET | ||
| 503 | |||
| 504 | /* 3-operand, word-only, src2=cl */ | ||
| 505 | #define FASTOP3WCL(op) \ | ||
| 506 | FOP_START(op) \ | ||
| 507 | FOPNOP() \ | ||
| 508 | FOP3E(op##w, ax, bx, cl) \ | ||
| 509 | FOP3E(op##l, eax, ebx, cl) \ | ||
| 510 | ON64(FOP3E(op##q, rax, rbx, cl)) \ | ||
| 511 | FOP_END | ||
| 512 | |||
| 513 | /* Special case for SETcc - 1 instruction per cc */ | ||
| 514 | #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" | ||
| 515 | |||
| 516 | FOP_START(setcc) | ||
| 517 | FOP_SETCC(seto) | ||
| 518 | FOP_SETCC(setno) | ||
| 519 | FOP_SETCC(setc) | ||
| 520 | FOP_SETCC(setnc) | ||
| 521 | FOP_SETCC(setz) | ||
| 522 | FOP_SETCC(setnz) | ||
| 523 | FOP_SETCC(setbe) | ||
| 524 | FOP_SETCC(setnbe) | ||
| 525 | FOP_SETCC(sets) | ||
| 526 | FOP_SETCC(setns) | ||
| 527 | FOP_SETCC(setp) | ||
| 528 | FOP_SETCC(setnp) | ||
| 529 | FOP_SETCC(setl) | ||
| 530 | FOP_SETCC(setnl) | ||
| 531 | FOP_SETCC(setle) | ||
| 532 | FOP_SETCC(setnle) | ||
| 533 | FOP_END; | ||
| 534 | |||
| 410 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ | 535 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ |
| 411 | do { \ | 536 | do { \ |
| 412 | unsigned long _tmp; \ | 537 | unsigned long _tmp; \ |
| @@ -663,7 +788,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
| 663 | ulong la; | 788 | ulong la; |
| 664 | u32 lim; | 789 | u32 lim; |
| 665 | u16 sel; | 790 | u16 sel; |
| 666 | unsigned cpl, rpl; | 791 | unsigned cpl; |
| 667 | 792 | ||
| 668 | la = seg_base(ctxt, addr.seg) + addr.ea; | 793 | la = seg_base(ctxt, addr.seg) + addr.ea; |
| 669 | switch (ctxt->mode) { | 794 | switch (ctxt->mode) { |
| @@ -697,11 +822,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
| 697 | goto bad; | 822 | goto bad; |
| 698 | } | 823 | } |
| 699 | cpl = ctxt->ops->cpl(ctxt); | 824 | cpl = ctxt->ops->cpl(ctxt); |
| 700 | if (ctxt->mode == X86EMUL_MODE_REAL) | ||
| 701 | rpl = 0; | ||
| 702 | else | ||
| 703 | rpl = sel & 3; | ||
| 704 | cpl = max(cpl, rpl); | ||
| 705 | if (!(desc.type & 8)) { | 825 | if (!(desc.type & 8)) { |
| 706 | /* data segment */ | 826 | /* data segment */ |
| 707 | if (cpl > desc.dpl) | 827 | if (cpl > desc.dpl) |
| @@ -852,39 +972,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
| 852 | return rc; | 972 | return rc; |
| 853 | } | 973 | } |
| 854 | 974 | ||
| 855 | static int test_cc(unsigned int condition, unsigned int flags) | 975 | FASTOP2(add); |
| 856 | { | 976 | FASTOP2(or); |
| 857 | int rc = 0; | 977 | FASTOP2(adc); |
| 858 | 978 | FASTOP2(sbb); | |
| 859 | switch ((condition & 15) >> 1) { | 979 | FASTOP2(and); |
| 860 | case 0: /* o */ | 980 | FASTOP2(sub); |
| 861 | rc |= (flags & EFLG_OF); | 981 | FASTOP2(xor); |
| 862 | break; | 982 | FASTOP2(cmp); |
| 863 | case 1: /* b/c/nae */ | 983 | FASTOP2(test); |
| 864 | rc |= (flags & EFLG_CF); | 984 | |
| 865 | break; | 985 | FASTOP3WCL(shld); |
| 866 | case 2: /* z/e */ | 986 | FASTOP3WCL(shrd); |
| 867 | rc |= (flags & EFLG_ZF); | 987 | |
| 868 | break; | 988 | FASTOP2W(imul); |
| 869 | case 3: /* be/na */ | 989 | |
| 870 | rc |= (flags & (EFLG_CF|EFLG_ZF)); | 990 | FASTOP1(not); |
| 871 | break; | 991 | FASTOP1(neg); |
| 872 | case 4: /* s */ | 992 | FASTOP1(inc); |
| 873 | rc |= (flags & EFLG_SF); | 993 | FASTOP1(dec); |
| 874 | break; | 994 | |
| 875 | case 5: /* p/pe */ | 995 | FASTOP2CL(rol); |
| 876 | rc |= (flags & EFLG_PF); | 996 | FASTOP2CL(ror); |
| 877 | break; | 997 | FASTOP2CL(rcl); |
| 878 | case 7: /* le/ng */ | 998 | FASTOP2CL(rcr); |
| 879 | rc |= (flags & EFLG_ZF); | 999 | FASTOP2CL(shl); |
| 880 | /* fall through */ | 1000 | FASTOP2CL(shr); |
| 881 | case 6: /* l/nge */ | 1001 | FASTOP2CL(sar); |
| 882 | rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); | 1002 | |
| 883 | break; | 1003 | FASTOP2W(bsf); |
| 884 | } | 1004 | FASTOP2W(bsr); |
| 885 | 1005 | FASTOP2W(bt); | |
| 886 | /* Odd condition identifiers (lsb == 1) have inverted sense. */ | 1006 | FASTOP2W(bts); |
| 887 | return (!!rc ^ (condition & 1)); | 1007 | FASTOP2W(btr); |
| 1008 | FASTOP2W(btc); | ||
| 1009 | |||
| 1010 | static u8 test_cc(unsigned int condition, unsigned long flags) | ||
| 1011 | { | ||
| 1012 | u8 rc; | ||
| 1013 | void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); | ||
| 1014 | |||
| 1015 | flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; | ||
| 1016 | asm("push %[flags]; popf; call *%[fastop]" | ||
| 1017 | : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); | ||
| 1018 | return rc; | ||
| 888 | } | 1019 | } |
| 889 | 1020 | ||
| 890 | static void fetch_register_operand(struct operand *op) | 1021 | static void fetch_register_operand(struct operand *op) |
| @@ -994,6 +1125,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) | |||
| 994 | ctxt->ops->put_fpu(ctxt); | 1125 | ctxt->ops->put_fpu(ctxt); |
| 995 | } | 1126 | } |
| 996 | 1127 | ||
| 1128 | static int em_fninit(struct x86_emulate_ctxt *ctxt) | ||
| 1129 | { | ||
| 1130 | if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) | ||
| 1131 | return emulate_nm(ctxt); | ||
| 1132 | |||
| 1133 | ctxt->ops->get_fpu(ctxt); | ||
| 1134 | asm volatile("fninit"); | ||
| 1135 | ctxt->ops->put_fpu(ctxt); | ||
| 1136 | return X86EMUL_CONTINUE; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | static int em_fnstcw(struct x86_emulate_ctxt *ctxt) | ||
| 1140 | { | ||
| 1141 | u16 fcw; | ||
| 1142 | |||
| 1143 | if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) | ||
| 1144 | return emulate_nm(ctxt); | ||
| 1145 | |||
| 1146 | ctxt->ops->get_fpu(ctxt); | ||
| 1147 | asm volatile("fnstcw %0": "+m"(fcw)); | ||
| 1148 | ctxt->ops->put_fpu(ctxt); | ||
| 1149 | |||
| 1150 | /* force 2 byte destination */ | ||
| 1151 | ctxt->dst.bytes = 2; | ||
| 1152 | ctxt->dst.val = fcw; | ||
| 1153 | |||
| 1154 | return X86EMUL_CONTINUE; | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | static int em_fnstsw(struct x86_emulate_ctxt *ctxt) | ||
| 1158 | { | ||
| 1159 | u16 fsw; | ||
| 1160 | |||
| 1161 | if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) | ||
| 1162 | return emulate_nm(ctxt); | ||
| 1163 | |||
| 1164 | ctxt->ops->get_fpu(ctxt); | ||
| 1165 | asm volatile("fnstsw %0": "+m"(fsw)); | ||
| 1166 | ctxt->ops->put_fpu(ctxt); | ||
| 1167 | |||
| 1168 | /* force 2 byte destination */ | ||
| 1169 | ctxt->dst.bytes = 2; | ||
| 1170 | ctxt->dst.val = fsw; | ||
| 1171 | |||
| 1172 | return X86EMUL_CONTINUE; | ||
| 1173 | } | ||
| 1174 | |||
| 997 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | 1175 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, |
| 998 | struct operand *op) | 1176 | struct operand *op) |
| 999 | { | 1177 | { |
| @@ -1534,6 +1712,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt) | |||
| 1534 | { | 1712 | { |
| 1535 | int rc; | 1713 | int rc; |
| 1536 | 1714 | ||
| 1715 | if (ctxt->d & NoWrite) | ||
| 1716 | return X86EMUL_CONTINUE; | ||
| 1717 | |||
| 1537 | switch (ctxt->dst.type) { | 1718 | switch (ctxt->dst.type) { |
| 1538 | case OP_REG: | 1719 | case OP_REG: |
| 1539 | write_register_operand(&ctxt->dst); | 1720 | write_register_operand(&ctxt->dst); |
| @@ -1918,47 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | |||
| 1918 | return X86EMUL_CONTINUE; | 2099 | return X86EMUL_CONTINUE; |
| 1919 | } | 2100 | } |
| 1920 | 2101 | ||
| 1921 | static int em_grp2(struct x86_emulate_ctxt *ctxt) | ||
| 1922 | { | ||
| 1923 | switch (ctxt->modrm_reg) { | ||
| 1924 | case 0: /* rol */ | ||
| 1925 | emulate_2op_SrcB(ctxt, "rol"); | ||
| 1926 | break; | ||
| 1927 | case 1: /* ror */ | ||
| 1928 | emulate_2op_SrcB(ctxt, "ror"); | ||
| 1929 | break; | ||
| 1930 | case 2: /* rcl */ | ||
| 1931 | emulate_2op_SrcB(ctxt, "rcl"); | ||
| 1932 | break; | ||
| 1933 | case 3: /* rcr */ | ||
| 1934 | emulate_2op_SrcB(ctxt, "rcr"); | ||
| 1935 | break; | ||
| 1936 | case 4: /* sal/shl */ | ||
| 1937 | case 6: /* sal/shl */ | ||
| 1938 | emulate_2op_SrcB(ctxt, "sal"); | ||
| 1939 | break; | ||
| 1940 | case 5: /* shr */ | ||
| 1941 | emulate_2op_SrcB(ctxt, "shr"); | ||
| 1942 | break; | ||
| 1943 | case 7: /* sar */ | ||
| 1944 | emulate_2op_SrcB(ctxt, "sar"); | ||
| 1945 | break; | ||
| 1946 | } | ||
| 1947 | return X86EMUL_CONTINUE; | ||
| 1948 | } | ||
| 1949 | |||
| 1950 | static int em_not(struct x86_emulate_ctxt *ctxt) | ||
| 1951 | { | ||
| 1952 | ctxt->dst.val = ~ctxt->dst.val; | ||
| 1953 | return X86EMUL_CONTINUE; | ||
| 1954 | } | ||
| 1955 | |||
| 1956 | static int em_neg(struct x86_emulate_ctxt *ctxt) | ||
| 1957 | { | ||
| 1958 | emulate_1op(ctxt, "neg"); | ||
| 1959 | return X86EMUL_CONTINUE; | ||
| 1960 | } | ||
| 1961 | |||
| 1962 | static int em_mul_ex(struct x86_emulate_ctxt *ctxt) | 2102 | static int em_mul_ex(struct x86_emulate_ctxt *ctxt) |
| 1963 | { | 2103 | { |
| 1964 | u8 ex = 0; | 2104 | u8 ex = 0; |
| @@ -2000,12 +2140,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
| 2000 | int rc = X86EMUL_CONTINUE; | 2140 | int rc = X86EMUL_CONTINUE; |
| 2001 | 2141 | ||
| 2002 | switch (ctxt->modrm_reg) { | 2142 | switch (ctxt->modrm_reg) { |
| 2003 | case 0: /* inc */ | ||
| 2004 | emulate_1op(ctxt, "inc"); | ||
| 2005 | break; | ||
| 2006 | case 1: /* dec */ | ||
| 2007 | emulate_1op(ctxt, "dec"); | ||
| 2008 | break; | ||
| 2009 | case 2: /* call near abs */ { | 2143 | case 2: /* call near abs */ { |
| 2010 | long int old_eip; | 2144 | long int old_eip; |
| 2011 | old_eip = ctxt->_eip; | 2145 | old_eip = ctxt->_eip; |
| @@ -2075,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | |||
| 2075 | /* Save real source value, then compare EAX against destination. */ | 2209 | /* Save real source value, then compare EAX against destination. */ |
| 2076 | ctxt->src.orig_val = ctxt->src.val; | 2210 | ctxt->src.orig_val = ctxt->src.val; |
| 2077 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); | 2211 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); |
| 2078 | emulate_2op_SrcV(ctxt, "cmp"); | 2212 | fastop(ctxt, em_cmp); |
| 2079 | 2213 | ||
| 2080 | if (ctxt->eflags & EFLG_ZF) { | 2214 | if (ctxt->eflags & EFLG_ZF) { |
| 2081 | /* Success: write back to memory. */ | 2215 | /* Success: write back to memory. */ |
| @@ -2843,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
| 2843 | ctxt->src.type = OP_IMM; | 2977 | ctxt->src.type = OP_IMM; |
| 2844 | ctxt->src.val = 0; | 2978 | ctxt->src.val = 0; |
| 2845 | ctxt->src.bytes = 1; | 2979 | ctxt->src.bytes = 1; |
| 2846 | emulate_2op_SrcV(ctxt, "or"); | 2980 | fastop(ctxt, em_or); |
| 2847 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); | 2981 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); |
| 2848 | if (cf) | 2982 | if (cf) |
| 2849 | ctxt->eflags |= X86_EFLAGS_CF; | 2983 | ctxt->eflags |= X86_EFLAGS_CF; |
| @@ -2852,6 +2986,24 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
| 2852 | return X86EMUL_CONTINUE; | 2986 | return X86EMUL_CONTINUE; |
| 2853 | } | 2987 | } |
| 2854 | 2988 | ||
| 2989 | static int em_aad(struct x86_emulate_ctxt *ctxt) | ||
| 2990 | { | ||
| 2991 | u8 al = ctxt->dst.val & 0xff; | ||
| 2992 | u8 ah = (ctxt->dst.val >> 8) & 0xff; | ||
| 2993 | |||
| 2994 | al = (al + (ah * ctxt->src.val)) & 0xff; | ||
| 2995 | |||
| 2996 | ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al; | ||
| 2997 | |||
| 2998 | /* Set PF, ZF, SF */ | ||
| 2999 | ctxt->src.type = OP_IMM; | ||
| 3000 | ctxt->src.val = 0; | ||
| 3001 | ctxt->src.bytes = 1; | ||
| 3002 | fastop(ctxt, em_or); | ||
| 3003 | |||
| 3004 | return X86EMUL_CONTINUE; | ||
| 3005 | } | ||
| 3006 | |||
| 2855 | static int em_call(struct x86_emulate_ctxt *ctxt) | 3007 | static int em_call(struct x86_emulate_ctxt *ctxt) |
| 2856 | { | 3008 | { |
| 2857 | long rel = ctxt->src.val; | 3009 | long rel = ctxt->src.val; |
| @@ -2900,64 +3052,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | |||
| 2900 | return X86EMUL_CONTINUE; | 3052 | return X86EMUL_CONTINUE; |
| 2901 | } | 3053 | } |
| 2902 | 3054 | ||
| 2903 | static int em_add(struct x86_emulate_ctxt *ctxt) | ||
| 2904 | { | ||
| 2905 | emulate_2op_SrcV(ctxt, "add"); | ||
| 2906 | return X86EMUL_CONTINUE; | ||
| 2907 | } | ||
| 2908 | |||
| 2909 | static int em_or(struct x86_emulate_ctxt *ctxt) | ||
| 2910 | { | ||
| 2911 | emulate_2op_SrcV(ctxt, "or"); | ||
| 2912 | return X86EMUL_CONTINUE; | ||
| 2913 | } | ||
| 2914 | |||
| 2915 | static int em_adc(struct x86_emulate_ctxt *ctxt) | ||
| 2916 | { | ||
| 2917 | emulate_2op_SrcV(ctxt, "adc"); | ||
| 2918 | return X86EMUL_CONTINUE; | ||
| 2919 | } | ||
| 2920 | |||
| 2921 | static int em_sbb(struct x86_emulate_ctxt *ctxt) | ||
| 2922 | { | ||
| 2923 | emulate_2op_SrcV(ctxt, "sbb"); | ||
| 2924 | return X86EMUL_CONTINUE; | ||
| 2925 | } | ||
| 2926 | |||
| 2927 | static int em_and(struct x86_emulate_ctxt *ctxt) | ||
| 2928 | { | ||
| 2929 | emulate_2op_SrcV(ctxt, "and"); | ||
| 2930 | return X86EMUL_CONTINUE; | ||
| 2931 | } | ||
| 2932 | |||
| 2933 | static int em_sub(struct x86_emulate_ctxt *ctxt) | ||
| 2934 | { | ||
| 2935 | emulate_2op_SrcV(ctxt, "sub"); | ||
| 2936 | return X86EMUL_CONTINUE; | ||
| 2937 | } | ||
| 2938 | |||
| 2939 | static int em_xor(struct x86_emulate_ctxt *ctxt) | ||
| 2940 | { | ||
| 2941 | emulate_2op_SrcV(ctxt, "xor"); | ||
| 2942 | return X86EMUL_CONTINUE; | ||
| 2943 | } | ||
| 2944 | |||
| 2945 | static int em_cmp(struct x86_emulate_ctxt *ctxt) | ||
| 2946 | { | ||
| 2947 | emulate_2op_SrcV(ctxt, "cmp"); | ||
| 2948 | /* Disable writeback. */ | ||
| 2949 | ctxt->dst.type = OP_NONE; | ||
| 2950 | return X86EMUL_CONTINUE; | ||
| 2951 | } | ||
| 2952 | |||
| 2953 | static int em_test(struct x86_emulate_ctxt *ctxt) | ||
| 2954 | { | ||
| 2955 | emulate_2op_SrcV(ctxt, "test"); | ||
| 2956 | /* Disable writeback. */ | ||
| 2957 | ctxt->dst.type = OP_NONE; | ||
| 2958 | return X86EMUL_CONTINUE; | ||
| 2959 | } | ||
| 2960 | |||
| 2961 | static int em_xchg(struct x86_emulate_ctxt *ctxt) | 3055 | static int em_xchg(struct x86_emulate_ctxt *ctxt) |
| 2962 | { | 3056 | { |
| 2963 | /* Write back the register source. */ | 3057 | /* Write back the register source. */ |
| @@ -2970,16 +3064,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) | |||
| 2970 | return X86EMUL_CONTINUE; | 3064 | return X86EMUL_CONTINUE; |
| 2971 | } | 3065 | } |
| 2972 | 3066 | ||
| 2973 | static int em_imul(struct x86_emulate_ctxt *ctxt) | ||
| 2974 | { | ||
| 2975 | emulate_2op_SrcV_nobyte(ctxt, "imul"); | ||
| 2976 | return X86EMUL_CONTINUE; | ||
| 2977 | } | ||
| 2978 | |||
| 2979 | static int em_imul_3op(struct x86_emulate_ctxt *ctxt) | 3067 | static int em_imul_3op(struct x86_emulate_ctxt *ctxt) |
| 2980 | { | 3068 | { |
| 2981 | ctxt->dst.val = ctxt->src2.val; | 3069 | ctxt->dst.val = ctxt->src2.val; |
| 2982 | return em_imul(ctxt); | 3070 | return fastop(ctxt, em_imul); |
| 2983 | } | 3071 | } |
| 2984 | 3072 | ||
| 2985 | static int em_cwd(struct x86_emulate_ctxt *ctxt) | 3073 | static int em_cwd(struct x86_emulate_ctxt *ctxt) |
| @@ -3300,47 +3388,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt) | |||
| 3300 | return X86EMUL_CONTINUE; | 3388 | return X86EMUL_CONTINUE; |
| 3301 | } | 3389 | } |
| 3302 | 3390 | ||
| 3303 | static int em_bt(struct x86_emulate_ctxt *ctxt) | ||
| 3304 | { | ||
| 3305 | /* Disable writeback. */ | ||
| 3306 | ctxt->dst.type = OP_NONE; | ||
| 3307 | /* only subword offset */ | ||
| 3308 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | ||
| 3309 | |||
| 3310 | emulate_2op_SrcV_nobyte(ctxt, "bt"); | ||
| 3311 | return X86EMUL_CONTINUE; | ||
| 3312 | } | ||
| 3313 | |||
| 3314 | static int em_bts(struct x86_emulate_ctxt *ctxt) | ||
| 3315 | { | ||
| 3316 | emulate_2op_SrcV_nobyte(ctxt, "bts"); | ||
| 3317 | return X86EMUL_CONTINUE; | ||
| 3318 | } | ||
| 3319 | |||
| 3320 | static int em_btr(struct x86_emulate_ctxt *ctxt) | ||
| 3321 | { | ||
| 3322 | emulate_2op_SrcV_nobyte(ctxt, "btr"); | ||
| 3323 | return X86EMUL_CONTINUE; | ||
| 3324 | } | ||
| 3325 | |||
| 3326 | static int em_btc(struct x86_emulate_ctxt *ctxt) | ||
| 3327 | { | ||
| 3328 | emulate_2op_SrcV_nobyte(ctxt, "btc"); | ||
| 3329 | return X86EMUL_CONTINUE; | ||
| 3330 | } | ||
| 3331 | |||
| 3332 | static int em_bsf(struct x86_emulate_ctxt *ctxt) | ||
| 3333 | { | ||
| 3334 | emulate_2op_SrcV_nobyte(ctxt, "bsf"); | ||
| 3335 | return X86EMUL_CONTINUE; | ||
| 3336 | } | ||
| 3337 | |||
| 3338 | static int em_bsr(struct x86_emulate_ctxt *ctxt) | ||
| 3339 | { | ||
| 3340 | emulate_2op_SrcV_nobyte(ctxt, "bsr"); | ||
| 3341 | return X86EMUL_CONTINUE; | ||
| 3342 | } | ||
| 3343 | |||
| 3344 | static int em_cpuid(struct x86_emulate_ctxt *ctxt) | 3391 | static int em_cpuid(struct x86_emulate_ctxt *ctxt) |
| 3345 | { | 3392 | { |
| 3346 | u32 eax, ebx, ecx, edx; | 3393 | u32 eax, ebx, ecx, edx; |
| @@ -3572,7 +3619,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
| 3572 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | 3619 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } |
| 3573 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } | 3620 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } |
| 3574 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } | 3621 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } |
| 3622 | #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } | ||
| 3575 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } | 3623 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } |
| 3624 | #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } | ||
| 3576 | #define II(_f, _e, _i) \ | 3625 | #define II(_f, _e, _i) \ |
| 3577 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } | 3626 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } |
| 3578 | #define IIP(_f, _e, _i, _p) \ | 3627 | #define IIP(_f, _e, _i, _p) \ |
| @@ -3583,12 +3632,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
| 3583 | #define D2bv(_f) D((_f) | ByteOp), D(_f) | 3632 | #define D2bv(_f) D((_f) | ByteOp), D(_f) |
| 3584 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) | 3633 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) |
| 3585 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) | 3634 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) |
| 3635 | #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e) | ||
| 3586 | #define I2bvIP(_f, _e, _i, _p) \ | 3636 | #define I2bvIP(_f, _e, _i, _p) \ |
| 3587 | IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) | 3637 | IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) |
| 3588 | 3638 | ||
| 3589 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ | 3639 | #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \ |
| 3590 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ | 3640 | F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
| 3591 | I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) | 3641 | F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) |
| 3592 | 3642 | ||
| 3593 | static const struct opcode group7_rm1[] = { | 3643 | static const struct opcode group7_rm1[] = { |
| 3594 | DI(SrcNone | Priv, monitor), | 3644 | DI(SrcNone | Priv, monitor), |
| @@ -3614,25 +3664,36 @@ static const struct opcode group7_rm7[] = { | |||
| 3614 | }; | 3664 | }; |
| 3615 | 3665 | ||
| 3616 | static const struct opcode group1[] = { | 3666 | static const struct opcode group1[] = { |
| 3617 | I(Lock, em_add), | 3667 | F(Lock, em_add), |
| 3618 | I(Lock | PageTable, em_or), | 3668 | F(Lock | PageTable, em_or), |
| 3619 | I(Lock, em_adc), | 3669 | F(Lock, em_adc), |
| 3620 | I(Lock, em_sbb), | 3670 | F(Lock, em_sbb), |
| 3621 | I(Lock | PageTable, em_and), | 3671 | F(Lock | PageTable, em_and), |
| 3622 | I(Lock, em_sub), | 3672 | F(Lock, em_sub), |
| 3623 | I(Lock, em_xor), | 3673 | F(Lock, em_xor), |
| 3624 | I(0, em_cmp), | 3674 | F(NoWrite, em_cmp), |
| 3625 | }; | 3675 | }; |
| 3626 | 3676 | ||
| 3627 | static const struct opcode group1A[] = { | 3677 | static const struct opcode group1A[] = { |
| 3628 | I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, | 3678 | I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, |
| 3629 | }; | 3679 | }; |
| 3630 | 3680 | ||
| 3681 | static const struct opcode group2[] = { | ||
| 3682 | F(DstMem | ModRM, em_rol), | ||
| 3683 | F(DstMem | ModRM, em_ror), | ||
| 3684 | F(DstMem | ModRM, em_rcl), | ||
| 3685 | F(DstMem | ModRM, em_rcr), | ||
| 3686 | F(DstMem | ModRM, em_shl), | ||
| 3687 | F(DstMem | ModRM, em_shr), | ||
| 3688 | F(DstMem | ModRM, em_shl), | ||
| 3689 | F(DstMem | ModRM, em_sar), | ||
| 3690 | }; | ||
| 3691 | |||
| 3631 | static const struct opcode group3[] = { | 3692 | static const struct opcode group3[] = { |
| 3632 | I(DstMem | SrcImm, em_test), | 3693 | F(DstMem | SrcImm | NoWrite, em_test), |
| 3633 | I(DstMem | SrcImm, em_test), | 3694 | F(DstMem | SrcImm | NoWrite, em_test), |
| 3634 | I(DstMem | SrcNone | Lock, em_not), | 3695 | F(DstMem | SrcNone | Lock, em_not), |
| 3635 | I(DstMem | SrcNone | Lock, em_neg), | 3696 | F(DstMem | SrcNone | Lock, em_neg), |
| 3636 | I(SrcMem, em_mul_ex), | 3697 | I(SrcMem, em_mul_ex), |
| 3637 | I(SrcMem, em_imul_ex), | 3698 | I(SrcMem, em_imul_ex), |
| 3638 | I(SrcMem, em_div_ex), | 3699 | I(SrcMem, em_div_ex), |
| @@ -3640,14 +3701,14 @@ static const struct opcode group3[] = { | |||
| 3640 | }; | 3701 | }; |
| 3641 | 3702 | ||
| 3642 | static const struct opcode group4[] = { | 3703 | static const struct opcode group4[] = { |
| 3643 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), | 3704 | F(ByteOp | DstMem | SrcNone | Lock, em_inc), |
| 3644 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), | 3705 | F(ByteOp | DstMem | SrcNone | Lock, em_dec), |
| 3645 | N, N, N, N, N, N, | 3706 | N, N, N, N, N, N, |
| 3646 | }; | 3707 | }; |
| 3647 | 3708 | ||
| 3648 | static const struct opcode group5[] = { | 3709 | static const struct opcode group5[] = { |
| 3649 | I(DstMem | SrcNone | Lock, em_grp45), | 3710 | F(DstMem | SrcNone | Lock, em_inc), |
| 3650 | I(DstMem | SrcNone | Lock, em_grp45), | 3711 | F(DstMem | SrcNone | Lock, em_dec), |
| 3651 | I(SrcMem | Stack, em_grp45), | 3712 | I(SrcMem | Stack, em_grp45), |
| 3652 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), | 3713 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), |
| 3653 | I(SrcMem | Stack, em_grp45), | 3714 | I(SrcMem | Stack, em_grp45), |
| @@ -3682,10 +3743,10 @@ static const struct group_dual group7 = { { | |||
| 3682 | 3743 | ||
| 3683 | static const struct opcode group8[] = { | 3744 | static const struct opcode group8[] = { |
| 3684 | N, N, N, N, | 3745 | N, N, N, N, |
| 3685 | I(DstMem | SrcImmByte, em_bt), | 3746 | F(DstMem | SrcImmByte | NoWrite, em_bt), |
| 3686 | I(DstMem | SrcImmByte | Lock | PageTable, em_bts), | 3747 | F(DstMem | SrcImmByte | Lock | PageTable, em_bts), |
| 3687 | I(DstMem | SrcImmByte | Lock, em_btr), | 3748 | F(DstMem | SrcImmByte | Lock, em_btr), |
| 3688 | I(DstMem | SrcImmByte | Lock | PageTable, em_btc), | 3749 | F(DstMem | SrcImmByte | Lock | PageTable, em_btc), |
| 3689 | }; | 3750 | }; |
| 3690 | 3751 | ||
| 3691 | static const struct group_dual group9 = { { | 3752 | static const struct group_dual group9 = { { |
| @@ -3707,33 +3768,96 @@ static const struct gprefix pfx_vmovntpx = { | |||
| 3707 | I(0, em_mov), N, N, N, | 3768 | I(0, em_mov), N, N, N, |
| 3708 | }; | 3769 | }; |
| 3709 | 3770 | ||
| 3771 | static const struct escape escape_d9 = { { | ||
| 3772 | N, N, N, N, N, N, N, I(DstMem, em_fnstcw), | ||
| 3773 | }, { | ||
| 3774 | /* 0xC0 - 0xC7 */ | ||
| 3775 | N, N, N, N, N, N, N, N, | ||
| 3776 | /* 0xC8 - 0xCF */ | ||
| 3777 | N, N, N, N, N, N, N, N, | ||
| 3778 | /* 0xD0 - 0xC7 */ | ||
| 3779 | N, N, N, N, N, N, N, N, | ||
| 3780 | /* 0xD8 - 0xDF */ | ||
| 3781 | N, N, N, N, N, N, N, N, | ||
| 3782 | /* 0xE0 - 0xE7 */ | ||
| 3783 | N, N, N, N, N, N, N, N, | ||
| 3784 | /* 0xE8 - 0xEF */ | ||
| 3785 | N, N, N, N, N, N, N, N, | ||
| 3786 | /* 0xF0 - 0xF7 */ | ||
| 3787 | N, N, N, N, N, N, N, N, | ||
| 3788 | /* 0xF8 - 0xFF */ | ||
| 3789 | N, N, N, N, N, N, N, N, | ||
| 3790 | } }; | ||
| 3791 | |||
| 3792 | static const struct escape escape_db = { { | ||
| 3793 | N, N, N, N, N, N, N, N, | ||
| 3794 | }, { | ||
| 3795 | /* 0xC0 - 0xC7 */ | ||
| 3796 | N, N, N, N, N, N, N, N, | ||
| 3797 | /* 0xC8 - 0xCF */ | ||
| 3798 | N, N, N, N, N, N, N, N, | ||
| 3799 | /* 0xD0 - 0xC7 */ | ||
| 3800 | N, N, N, N, N, N, N, N, | ||
| 3801 | /* 0xD8 - 0xDF */ | ||
| 3802 | N, N, N, N, N, N, N, N, | ||
| 3803 | /* 0xE0 - 0xE7 */ | ||
| 3804 | N, N, N, I(ImplicitOps, em_fninit), N, N, N, N, | ||
| 3805 | /* 0xE8 - 0xEF */ | ||
| 3806 | N, N, N, N, N, N, N, N, | ||
| 3807 | /* 0xF0 - 0xF7 */ | ||
| 3808 | N, N, N, N, N, N, N, N, | ||
| 3809 | /* 0xF8 - 0xFF */ | ||
| 3810 | N, N, N, N, N, N, N, N, | ||
| 3811 | } }; | ||
| 3812 | |||
| 3813 | static const struct escape escape_dd = { { | ||
| 3814 | N, N, N, N, N, N, N, I(DstMem, em_fnstsw), | ||
| 3815 | }, { | ||
| 3816 | /* 0xC0 - 0xC7 */ | ||
| 3817 | N, N, N, N, N, N, N, N, | ||
| 3818 | /* 0xC8 - 0xCF */ | ||
| 3819 | N, N, N, N, N, N, N, N, | ||
| 3820 | /* 0xD0 - 0xC7 */ | ||
| 3821 | N, N, N, N, N, N, N, N, | ||
| 3822 | /* 0xD8 - 0xDF */ | ||
| 3823 | N, N, N, N, N, N, N, N, | ||
| 3824 | /* 0xE0 - 0xE7 */ | ||
| 3825 | N, N, N, N, N, N, N, N, | ||
| 3826 | /* 0xE8 - 0xEF */ | ||
| 3827 | N, N, N, N, N, N, N, N, | ||
| 3828 | /* 0xF0 - 0xF7 */ | ||
| 3829 | N, N, N, N, N, N, N, N, | ||
| 3830 | /* 0xF8 - 0xFF */ | ||
| 3831 | N, N, N, N, N, N, N, N, | ||
| 3832 | } }; | ||
| 3833 | |||
| 3710 | static const struct opcode opcode_table[256] = { | 3834 | static const struct opcode opcode_table[256] = { |
| 3711 | /* 0x00 - 0x07 */ | 3835 | /* 0x00 - 0x07 */ |
| 3712 | I6ALU(Lock, em_add), | 3836 | F6ALU(Lock, em_add), |
| 3713 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), | 3837 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), |
| 3714 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), | 3838 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), |
| 3715 | /* 0x08 - 0x0F */ | 3839 | /* 0x08 - 0x0F */ |
| 3716 | I6ALU(Lock | PageTable, em_or), | 3840 | F6ALU(Lock | PageTable, em_or), |
| 3717 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), | 3841 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), |
| 3718 | N, | 3842 | N, |
| 3719 | /* 0x10 - 0x17 */ | 3843 | /* 0x10 - 0x17 */ |
| 3720 | I6ALU(Lock, em_adc), | 3844 | F6ALU(Lock, em_adc), |
| 3721 | I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), | 3845 | I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), |
| 3722 | I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), | 3846 | I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), |
| 3723 | /* 0x18 - 0x1F */ | 3847 | /* 0x18 - 0x1F */ |
| 3724 | I6ALU(Lock, em_sbb), | 3848 | F6ALU(Lock, em_sbb), |
| 3725 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), | 3849 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), |
| 3726 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), | 3850 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), |
| 3727 | /* 0x20 - 0x27 */ | 3851 | /* 0x20 - 0x27 */ |
| 3728 | I6ALU(Lock | PageTable, em_and), N, N, | 3852 | F6ALU(Lock | PageTable, em_and), N, N, |
| 3729 | /* 0x28 - 0x2F */ | 3853 | /* 0x28 - 0x2F */ |
| 3730 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), | 3854 | F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), |
| 3731 | /* 0x30 - 0x37 */ | 3855 | /* 0x30 - 0x37 */ |
| 3732 | I6ALU(Lock, em_xor), N, N, | 3856 | F6ALU(Lock, em_xor), N, N, |
| 3733 | /* 0x38 - 0x3F */ | 3857 | /* 0x38 - 0x3F */ |
| 3734 | I6ALU(0, em_cmp), N, N, | 3858 | F6ALU(NoWrite, em_cmp), N, N, |
| 3735 | /* 0x40 - 0x4F */ | 3859 | /* 0x40 - 0x4F */ |
| 3736 | X16(D(DstReg)), | 3860 | X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)), |
| 3737 | /* 0x50 - 0x57 */ | 3861 | /* 0x50 - 0x57 */ |
| 3738 | X8(I(SrcReg | Stack, em_push)), | 3862 | X8(I(SrcReg | Stack, em_push)), |
| 3739 | /* 0x58 - 0x5F */ | 3863 | /* 0x58 - 0x5F */ |
| @@ -3757,7 +3881,7 @@ static const struct opcode opcode_table[256] = { | |||
| 3757 | G(DstMem | SrcImm, group1), | 3881 | G(DstMem | SrcImm, group1), |
| 3758 | G(ByteOp | DstMem | SrcImm | No64, group1), | 3882 | G(ByteOp | DstMem | SrcImm | No64, group1), |
| 3759 | G(DstMem | SrcImmByte, group1), | 3883 | G(DstMem | SrcImmByte, group1), |
| 3760 | I2bv(DstMem | SrcReg | ModRM, em_test), | 3884 | F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), |
| 3761 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), | 3885 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), |
| 3762 | /* 0x88 - 0x8F */ | 3886 | /* 0x88 - 0x8F */ |
| 3763 | I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), | 3887 | I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), |
| @@ -3777,18 +3901,18 @@ static const struct opcode opcode_table[256] = { | |||
| 3777 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3901 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
| 3778 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), | 3902 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
| 3779 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 3903 | I2bv(SrcSI | DstDI | Mov | String, em_mov), |
| 3780 | I2bv(SrcSI | DstDI | String, em_cmp), | 3904 | F2bv(SrcSI | DstDI | String | NoWrite, em_cmp), |
| 3781 | /* 0xA8 - 0xAF */ | 3905 | /* 0xA8 - 0xAF */ |
| 3782 | I2bv(DstAcc | SrcImm, em_test), | 3906 | F2bv(DstAcc | SrcImm | NoWrite, em_test), |
| 3783 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), | 3907 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), |
| 3784 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), | 3908 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), |
| 3785 | I2bv(SrcAcc | DstDI | String, em_cmp), | 3909 | F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), |
| 3786 | /* 0xB0 - 0xB7 */ | 3910 | /* 0xB0 - 0xB7 */ |
| 3787 | X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), | 3911 | X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), |
| 3788 | /* 0xB8 - 0xBF */ | 3912 | /* 0xB8 - 0xBF */ |
| 3789 | X8(I(DstReg | SrcImm | Mov, em_mov)), | 3913 | X8(I(DstReg | SrcImm64 | Mov, em_mov)), |
| 3790 | /* 0xC0 - 0xC7 */ | 3914 | /* 0xC0 - 0xC7 */ |
| 3791 | D2bv(DstMem | SrcImmByte | ModRM), | 3915 | G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), |
| 3792 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), | 3916 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), |
| 3793 | I(ImplicitOps | Stack, em_ret), | 3917 | I(ImplicitOps | Stack, em_ret), |
| 3794 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), | 3918 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), |
| @@ -3800,10 +3924,11 @@ static const struct opcode opcode_table[256] = { | |||
| 3800 | D(ImplicitOps), DI(SrcImmByte, intn), | 3924 | D(ImplicitOps), DI(SrcImmByte, intn), |
| 3801 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), | 3925 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), |
| 3802 | /* 0xD0 - 0xD7 */ | 3926 | /* 0xD0 - 0xD7 */ |
| 3803 | D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), | 3927 | G(Src2One | ByteOp, group2), G(Src2One, group2), |
| 3804 | N, N, N, N, | 3928 | G(Src2CL | ByteOp, group2), G(Src2CL, group2), |
| 3929 | N, I(DstAcc | SrcImmByte | No64, em_aad), N, N, | ||
| 3805 | /* 0xD8 - 0xDF */ | 3930 | /* 0xD8 - 0xDF */ |
| 3806 | N, N, N, N, N, N, N, N, | 3931 | N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, |
| 3807 | /* 0xE0 - 0xE7 */ | 3932 | /* 0xE0 - 0xE7 */ |
| 3808 | X3(I(SrcImmByte, em_loop)), | 3933 | X3(I(SrcImmByte, em_loop)), |
| 3809 | I(SrcImmByte, em_jcxz), | 3934 | I(SrcImmByte, em_jcxz), |
| @@ -3870,28 +3995,29 @@ static const struct opcode twobyte_table[256] = { | |||
| 3870 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3995 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
| 3871 | /* 0xA0 - 0xA7 */ | 3996 | /* 0xA0 - 0xA7 */ |
| 3872 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), | 3997 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
| 3873 | II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), | 3998 | II(ImplicitOps, em_cpuid, cpuid), |
| 3874 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3999 | F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), |
| 3875 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 4000 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), |
| 4001 | F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, | ||
| 3876 | /* 0xA8 - 0xAF */ | 4002 | /* 0xA8 - 0xAF */ |
| 3877 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), | 4003 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
| 3878 | DI(ImplicitOps, rsm), | 4004 | DI(ImplicitOps, rsm), |
| 3879 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | 4005 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), |
| 3880 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 4006 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), |
| 3881 | D(DstMem | SrcReg | Src2CL | ModRM), | 4007 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
| 3882 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 4008 | D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), |
| 3883 | /* 0xB0 - 0xB7 */ | 4009 | /* 0xB0 - 0xB7 */ |
| 3884 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), | 4010 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), |
| 3885 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 4011 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
| 3886 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), | 4012 | F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), |
| 3887 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | 4013 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), |
| 3888 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | 4014 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), |
| 3889 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 4015 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
| 3890 | /* 0xB8 - 0xBF */ | 4016 | /* 0xB8 - 0xBF */ |
| 3891 | N, N, | 4017 | N, N, |
| 3892 | G(BitOp, group8), | 4018 | G(BitOp, group8), |
| 3893 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), | 4019 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
| 3894 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | 4020 | F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), |
| 3895 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 4021 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
| 3896 | /* 0xC0 - 0xC7 */ | 4022 | /* 0xC0 - 0xC7 */ |
| 3897 | D2bv(DstMem | SrcReg | ModRM | Lock), | 4023 | D2bv(DstMem | SrcReg | ModRM | Lock), |
| @@ -3950,6 +4076,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
| 3950 | case 4: | 4076 | case 4: |
| 3951 | op->val = insn_fetch(s32, ctxt); | 4077 | op->val = insn_fetch(s32, ctxt); |
| 3952 | break; | 4078 | break; |
| 4079 | case 8: | ||
| 4080 | op->val = insn_fetch(s64, ctxt); | ||
| 4081 | break; | ||
| 3953 | } | 4082 | } |
| 3954 | if (!sign_extension) { | 4083 | if (!sign_extension) { |
| 3955 | switch (op->bytes) { | 4084 | switch (op->bytes) { |
| @@ -4028,6 +4157,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
| 4028 | case OpImm: | 4157 | case OpImm: |
| 4029 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); | 4158 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); |
| 4030 | break; | 4159 | break; |
| 4160 | case OpImm64: | ||
| 4161 | rc = decode_imm(ctxt, op, ctxt->op_bytes, true); | ||
| 4162 | break; | ||
| 4031 | case OpMem8: | 4163 | case OpMem8: |
| 4032 | ctxt->memop.bytes = 1; | 4164 | ctxt->memop.bytes = 1; |
| 4033 | goto mem_common; | 4165 | goto mem_common; |
| @@ -4222,6 +4354,12 @@ done_prefixes: | |||
| 4222 | case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; | 4354 | case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; |
| 4223 | } | 4355 | } |
| 4224 | break; | 4356 | break; |
| 4357 | case Escape: | ||
| 4358 | if (ctxt->modrm > 0xbf) | ||
| 4359 | opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; | ||
| 4360 | else | ||
| 4361 | opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; | ||
| 4362 | break; | ||
| 4225 | default: | 4363 | default: |
| 4226 | return EMULATION_FAILED; | 4364 | return EMULATION_FAILED; |
| 4227 | } | 4365 | } |
| @@ -4354,6 +4492,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, | |||
| 4354 | read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); | 4492 | read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); |
| 4355 | } | 4493 | } |
| 4356 | 4494 | ||
| 4495 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) | ||
| 4496 | { | ||
| 4497 | ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; | ||
| 4498 | fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; | ||
| 4499 | asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" | ||
| 4500 | : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) | ||
| 4501 | : "c"(ctxt->src2.val), [fastop]"S"(fop)); | ||
| 4502 | ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); | ||
| 4503 | return X86EMUL_CONTINUE; | ||
| 4504 | } | ||
| 4357 | 4505 | ||
| 4358 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | 4506 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
| 4359 | { | 4507 | { |
| @@ -4483,6 +4631,13 @@ special_insn: | |||
| 4483 | } | 4631 | } |
| 4484 | 4632 | ||
| 4485 | if (ctxt->execute) { | 4633 | if (ctxt->execute) { |
| 4634 | if (ctxt->d & Fastop) { | ||
| 4635 | void (*fop)(struct fastop *) = (void *)ctxt->execute; | ||
| 4636 | rc = fastop(ctxt, fop); | ||
| 4637 | if (rc != X86EMUL_CONTINUE) | ||
| 4638 | goto done; | ||
| 4639 | goto writeback; | ||
| 4640 | } | ||
| 4486 | rc = ctxt->execute(ctxt); | 4641 | rc = ctxt->execute(ctxt); |
| 4487 | if (rc != X86EMUL_CONTINUE) | 4642 | if (rc != X86EMUL_CONTINUE) |
| 4488 | goto done; | 4643 | goto done; |
| @@ -4493,12 +4648,6 @@ special_insn: | |||
| 4493 | goto twobyte_insn; | 4648 | goto twobyte_insn; |
| 4494 | 4649 | ||
| 4495 | switch (ctxt->b) { | 4650 | switch (ctxt->b) { |
| 4496 | case 0x40 ... 0x47: /* inc r16/r32 */ | ||
| 4497 | emulate_1op(ctxt, "inc"); | ||
| 4498 | break; | ||
| 4499 | case 0x48 ... 0x4f: /* dec r16/r32 */ | ||
| 4500 | emulate_1op(ctxt, "dec"); | ||
| 4501 | break; | ||
| 4502 | case 0x63: /* movsxd */ | 4651 | case 0x63: /* movsxd */ |
| 4503 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 4652 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
| 4504 | goto cannot_emulate; | 4653 | goto cannot_emulate; |
| @@ -4523,9 +4672,6 @@ special_insn: | |||
| 4523 | case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; | 4672 | case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; |
| 4524 | } | 4673 | } |
| 4525 | break; | 4674 | break; |
| 4526 | case 0xc0 ... 0xc1: | ||
| 4527 | rc = em_grp2(ctxt); | ||
| 4528 | break; | ||
| 4529 | case 0xcc: /* int3 */ | 4675 | case 0xcc: /* int3 */ |
| 4530 | rc = emulate_int(ctxt, 3); | 4676 | rc = emulate_int(ctxt, 3); |
| 4531 | break; | 4677 | break; |
| @@ -4536,13 +4682,6 @@ special_insn: | |||
| 4536 | if (ctxt->eflags & EFLG_OF) | 4682 | if (ctxt->eflags & EFLG_OF) |
| 4537 | rc = emulate_int(ctxt, 4); | 4683 | rc = emulate_int(ctxt, 4); |
| 4538 | break; | 4684 | break; |
| 4539 | case 0xd0 ... 0xd1: /* Grp2 */ | ||
| 4540 | rc = em_grp2(ctxt); | ||
| 4541 | break; | ||
| 4542 | case 0xd2 ... 0xd3: /* Grp2 */ | ||
| 4543 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX); | ||
| 4544 | rc = em_grp2(ctxt); | ||
| 4545 | break; | ||
| 4546 | case 0xe9: /* jmp rel */ | 4685 | case 0xe9: /* jmp rel */ |
| 4547 | case 0xeb: /* jmp rel short */ | 4686 | case 0xeb: /* jmp rel short */ |
| 4548 | jmp_rel(ctxt, ctxt->src.val); | 4687 | jmp_rel(ctxt, ctxt->src.val); |
| @@ -4661,14 +4800,6 @@ twobyte_insn: | |||
| 4661 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 4800 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
| 4662 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 4801 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
| 4663 | break; | 4802 | break; |
| 4664 | case 0xa4: /* shld imm8, r, r/m */ | ||
| 4665 | case 0xa5: /* shld cl, r, r/m */ | ||
| 4666 | emulate_2op_cl(ctxt, "shld"); | ||
| 4667 | break; | ||
| 4668 | case 0xac: /* shrd imm8, r, r/m */ | ||
| 4669 | case 0xad: /* shrd cl, r, r/m */ | ||
| 4670 | emulate_2op_cl(ctxt, "shrd"); | ||
| 4671 | break; | ||
| 4672 | case 0xae: /* clflush */ | 4803 | case 0xae: /* clflush */ |
| 4673 | break; | 4804 | break; |
| 4674 | case 0xb6 ... 0xb7: /* movzx */ | 4805 | case 0xb6 ... 0xb7: /* movzx */ |
| @@ -4682,7 +4813,7 @@ twobyte_insn: | |||
| 4682 | (s16) ctxt->src.val; | 4813 | (s16) ctxt->src.val; |
| 4683 | break; | 4814 | break; |
| 4684 | case 0xc0 ... 0xc1: /* xadd */ | 4815 | case 0xc0 ... 0xc1: /* xadd */ |
| 4685 | emulate_2op_SrcV(ctxt, "add"); | 4816 | fastop(ctxt, em_add); |
| 4686 | /* Write back the register source. */ | 4817 | /* Write back the register source. */ |
| 4687 | ctxt->src.val = ctxt->dst.orig_val; | 4818 | ctxt->src.val = ctxt->dst.orig_val; |
| 4688 | write_register_operand(&ctxt->src); | 4819 | write_register_operand(&ctxt->src); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 11300d2fa714..c1d30b2fc9bb 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm) | |||
| 122 | */ | 122 | */ |
| 123 | remaining = hrtimer_get_remaining(&ps->timer); | 123 | remaining = hrtimer_get_remaining(&ps->timer); |
| 124 | elapsed = ps->period - ktime_to_ns(remaining); | 124 | elapsed = ps->period - ktime_to_ns(remaining); |
| 125 | elapsed = mod_64(elapsed, ps->period); | ||
| 126 | 125 | ||
| 127 | return elapsed; | 126 | return elapsed; |
| 128 | } | 127 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 848206df0967..cc31f7c06d3d 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
| 241 | int irq, irq2, intno; | 241 | int irq, irq2, intno; |
| 242 | struct kvm_pic *s = pic_irqchip(kvm); | 242 | struct kvm_pic *s = pic_irqchip(kvm); |
| 243 | 243 | ||
| 244 | s->output = 0; | ||
| 245 | |||
| 244 | pic_lock(s); | 246 | pic_lock(s); |
| 245 | irq = pic_get_irq(&s->pics[0]); | 247 | irq = pic_get_irq(&s->pics[0]); |
| 246 | if (irq >= 0) { | 248 | if (irq >= 0) { |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 7e06ba1618bd..484bc874688b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
| @@ -38,49 +38,81 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
| 38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); | 38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); |
| 39 | 39 | ||
| 40 | /* | 40 | /* |
| 41 | * check if there is pending interrupt from | ||
| 42 | * non-APIC source without intack. | ||
| 43 | */ | ||
| 44 | static int kvm_cpu_has_extint(struct kvm_vcpu *v) | ||
| 45 | { | ||
| 46 | if (kvm_apic_accept_pic_intr(v)) | ||
| 47 | return pic_irqchip(v->kvm)->output; /* PIC */ | ||
| 48 | else | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | /* | ||
| 53 | * check if there is injectable interrupt: | ||
| 54 | * when virtual interrupt delivery enabled, | ||
| 55 | * interrupt from apic will handled by hardware, | ||
| 56 | * we don't need to check it here. | ||
| 57 | */ | ||
| 58 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | ||
| 59 | { | ||
| 60 | if (!irqchip_in_kernel(v->kvm)) | ||
| 61 | return v->arch.interrupt.pending; | ||
| 62 | |||
| 63 | if (kvm_cpu_has_extint(v)) | ||
| 64 | return 1; | ||
| 65 | |||
| 66 | if (kvm_apic_vid_enabled(v->kvm)) | ||
| 67 | return 0; | ||
| 68 | |||
| 69 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ | ||
| 70 | } | ||
| 71 | |||
| 72 | /* | ||
| 41 | * check if there is pending interrupt without | 73 | * check if there is pending interrupt without |
| 42 | * intack. | 74 | * intack. |
| 43 | */ | 75 | */ |
| 44 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | 76 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) |
| 45 | { | 77 | { |
| 46 | struct kvm_pic *s; | ||
| 47 | |||
| 48 | if (!irqchip_in_kernel(v->kvm)) | 78 | if (!irqchip_in_kernel(v->kvm)) |
| 49 | return v->arch.interrupt.pending; | 79 | return v->arch.interrupt.pending; |
| 50 | 80 | ||
| 51 | if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ | 81 | if (kvm_cpu_has_extint(v)) |
| 52 | if (kvm_apic_accept_pic_intr(v)) { | 82 | return 1; |
| 53 | s = pic_irqchip(v->kvm); /* PIC */ | 83 | |
| 54 | return s->output; | 84 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ |
| 55 | } else | ||
| 56 | return 0; | ||
| 57 | } | ||
| 58 | return 1; | ||
| 59 | } | 85 | } |
| 60 | EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); | 86 | EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); |
| 61 | 87 | ||
| 62 | /* | 88 | /* |
| 89 | * Read pending interrupt(from non-APIC source) | ||
| 90 | * vector and intack. | ||
| 91 | */ | ||
| 92 | static int kvm_cpu_get_extint(struct kvm_vcpu *v) | ||
| 93 | { | ||
| 94 | if (kvm_cpu_has_extint(v)) | ||
| 95 | return kvm_pic_read_irq(v->kvm); /* PIC */ | ||
| 96 | return -1; | ||
| 97 | } | ||
| 98 | |||
| 99 | /* | ||
| 63 | * Read pending interrupt vector and intack. | 100 | * Read pending interrupt vector and intack. |
| 64 | */ | 101 | */ |
| 65 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | 102 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v) |
| 66 | { | 103 | { |
| 67 | struct kvm_pic *s; | ||
| 68 | int vector; | 104 | int vector; |
| 69 | 105 | ||
| 70 | if (!irqchip_in_kernel(v->kvm)) | 106 | if (!irqchip_in_kernel(v->kvm)) |
| 71 | return v->arch.interrupt.nr; | 107 | return v->arch.interrupt.nr; |
| 72 | 108 | ||
| 73 | vector = kvm_get_apic_interrupt(v); /* APIC */ | 109 | vector = kvm_cpu_get_extint(v); |
| 74 | if (vector == -1) { | 110 | |
| 75 | if (kvm_apic_accept_pic_intr(v)) { | 111 | if (kvm_apic_vid_enabled(v->kvm) || vector != -1) |
| 76 | s = pic_irqchip(v->kvm); | 112 | return vector; /* PIC */ |
| 77 | s->output = 0; /* PIC */ | 113 | |
| 78 | vector = kvm_pic_read_irq(v->kvm); | 114 | return kvm_get_apic_interrupt(v); /* APIC */ |
| 79 | } | ||
| 80 | } | ||
| 81 | return vector; | ||
| 82 | } | 115 | } |
| 83 | EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | ||
| 84 | 116 | ||
| 85 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | 117 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) |
| 86 | { | 118 | { |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9392f527f107..02b51dd4e4ad 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic) | |||
| 140 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ | 140 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ |
| 141 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) | 141 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) |
| 142 | 142 | ||
| 143 | static inline int apic_x2apic_mode(struct kvm_lapic *apic) | ||
| 144 | { | ||
| 145 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | ||
| 146 | } | ||
| 147 | |||
| 148 | static inline int kvm_apic_id(struct kvm_lapic *apic) | 143 | static inline int kvm_apic_id(struct kvm_lapic *apic) |
| 149 | { | 144 | { |
| 150 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | 145 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; |
| 151 | } | 146 | } |
| 152 | 147 | ||
| 153 | static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) | 148 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, |
| 149 | struct kvm_lapic_irq *irq, | ||
| 150 | u64 *eoi_exit_bitmap) | ||
| 154 | { | 151 | { |
| 155 | u16 cid; | 152 | struct kvm_lapic **dst; |
| 156 | ldr >>= 32 - map->ldr_bits; | 153 | struct kvm_apic_map *map; |
| 157 | cid = (ldr >> map->cid_shift) & map->cid_mask; | 154 | unsigned long bitmap = 1; |
| 155 | int i; | ||
| 158 | 156 | ||
| 159 | BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); | 157 | rcu_read_lock(); |
| 158 | map = rcu_dereference(vcpu->kvm->arch.apic_map); | ||
| 160 | 159 | ||
| 161 | return cid; | 160 | if (unlikely(!map)) { |
| 162 | } | 161 | __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); |
| 162 | goto out; | ||
| 163 | } | ||
| 163 | 164 | ||
| 164 | static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | 165 | if (irq->dest_mode == 0) { /* physical mode */ |
| 165 | { | 166 | if (irq->delivery_mode == APIC_DM_LOWEST || |
| 166 | ldr >>= (32 - map->ldr_bits); | 167 | irq->dest_id == 0xff) { |
| 167 | return ldr & map->lid_mask; | 168 | __set_bit(irq->vector, |
| 169 | (unsigned long *)eoi_exit_bitmap); | ||
| 170 | goto out; | ||
| 171 | } | ||
| 172 | dst = &map->phys_map[irq->dest_id & 0xff]; | ||
| 173 | } else { | ||
| 174 | u32 mda = irq->dest_id << (32 - map->ldr_bits); | ||
| 175 | |||
| 176 | dst = map->logical_map[apic_cluster_id(map, mda)]; | ||
| 177 | |||
| 178 | bitmap = apic_logical_id(map, mda); | ||
| 179 | } | ||
| 180 | |||
| 181 | for_each_set_bit(i, &bitmap, 16) { | ||
| 182 | if (!dst[i]) | ||
| 183 | continue; | ||
| 184 | if (dst[i]->vcpu == vcpu) { | ||
| 185 | __set_bit(irq->vector, | ||
| 186 | (unsigned long *)eoi_exit_bitmap); | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | out: | ||
| 192 | rcu_read_unlock(); | ||
| 168 | } | 193 | } |
| 169 | 194 | ||
| 170 | static void recalculate_apic_map(struct kvm *kvm) | 195 | static void recalculate_apic_map(struct kvm *kvm) |
| @@ -230,6 +255,8 @@ out: | |||
| 230 | 255 | ||
| 231 | if (old) | 256 | if (old) |
| 232 | kfree_rcu(old, rcu); | 257 | kfree_rcu(old, rcu); |
| 258 | |||
| 259 | kvm_ioapic_make_eoibitmap_request(kvm); | ||
| 233 | } | 260 | } |
| 234 | 261 | ||
| 235 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) | 262 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) |
| @@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) | |||
| 345 | { | 372 | { |
| 346 | int result; | 373 | int result; |
| 347 | 374 | ||
| 375 | /* | ||
| 376 | * Note that irr_pending is just a hint. It will be always | ||
| 377 | * true with virtual interrupt delivery enabled. | ||
| 378 | */ | ||
| 348 | if (!apic->irr_pending) | 379 | if (!apic->irr_pending) |
| 349 | return -1; | 380 | return -1; |
| 350 | 381 | ||
| @@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
| 461 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 492 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
| 462 | { | 493 | { |
| 463 | int result; | 494 | int result; |
| 495 | |||
| 496 | /* Note that isr_count is always 1 with vid enabled */ | ||
| 464 | if (!apic->isr_count) | 497 | if (!apic->isr_count) |
| 465 | return -1; | 498 | return -1; |
| 466 | if (likely(apic->highest_isr_cache != -1)) | 499 | if (likely(apic->highest_isr_cache != -1)) |
| @@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
| 740 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 773 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
| 741 | } | 774 | } |
| 742 | 775 | ||
| 776 | static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) | ||
| 777 | { | ||
| 778 | if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | ||
| 779 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | ||
| 780 | int trigger_mode; | ||
| 781 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | ||
| 782 | trigger_mode = IOAPIC_LEVEL_TRIG; | ||
| 783 | else | ||
| 784 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
| 785 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | ||
| 786 | } | ||
| 787 | } | ||
| 788 | |||
| 743 | static int apic_set_eoi(struct kvm_lapic *apic) | 789 | static int apic_set_eoi(struct kvm_lapic *apic) |
| 744 | { | 790 | { |
| 745 | int vector = apic_find_highest_isr(apic); | 791 | int vector = apic_find_highest_isr(apic); |
| @@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic) | |||
| 756 | apic_clear_isr(vector, apic); | 802 | apic_clear_isr(vector, apic); |
| 757 | apic_update_ppr(apic); | 803 | apic_update_ppr(apic); |
| 758 | 804 | ||
| 759 | if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | 805 | kvm_ioapic_send_eoi(apic, vector); |
| 760 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | ||
| 761 | int trigger_mode; | ||
| 762 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | ||
| 763 | trigger_mode = IOAPIC_LEVEL_TRIG; | ||
| 764 | else | ||
| 765 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
| 766 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | ||
| 767 | } | ||
| 768 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 806 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); |
| 769 | return vector; | 807 | return vector; |
| 770 | } | 808 | } |
| 771 | 809 | ||
| 810 | /* | ||
| 811 | * this interface assumes a trap-like exit, which has already finished | ||
| 812 | * desired side effect including vISR and vPPR update. | ||
| 813 | */ | ||
| 814 | void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) | ||
| 815 | { | ||
| 816 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 817 | |||
| 818 | trace_kvm_eoi(apic, vector); | ||
| 819 | |||
| 820 | kvm_ioapic_send_eoi(apic, vector); | ||
| 821 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | ||
| 822 | } | ||
| 823 | EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); | ||
| 824 | |||
| 772 | static void apic_send_ipi(struct kvm_lapic *apic) | 825 | static void apic_send_ipi(struct kvm_lapic *apic) |
| 773 | { | 826 | { |
| 774 | u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); | 827 | u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); |
| @@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) | |||
| 1212 | } | 1265 | } |
| 1213 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); | 1266 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); |
| 1214 | 1267 | ||
| 1268 | /* emulate APIC access in a trap manner */ | ||
| 1269 | void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) | ||
| 1270 | { | ||
| 1271 | u32 val = 0; | ||
| 1272 | |||
| 1273 | /* hw has done the conditional check and inst decode */ | ||
| 1274 | offset &= 0xff0; | ||
| 1275 | |||
| 1276 | apic_reg_read(vcpu->arch.apic, offset, 4, &val); | ||
| 1277 | |||
| 1278 | /* TODO: optimize to just emulate side effect w/o one more write */ | ||
| 1279 | apic_reg_write(vcpu->arch.apic, offset, val); | ||
| 1280 | } | ||
| 1281 | EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); | ||
| 1282 | |||
| 1215 | void kvm_free_lapic(struct kvm_vcpu *vcpu) | 1283 | void kvm_free_lapic(struct kvm_vcpu *vcpu) |
| 1216 | { | 1284 | { |
| 1217 | struct kvm_lapic *apic = vcpu->arch.apic; | 1285 | struct kvm_lapic *apic = vcpu->arch.apic; |
| @@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) | |||
| 1288 | 1356 | ||
| 1289 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | 1357 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) |
| 1290 | { | 1358 | { |
| 1359 | u64 old_value = vcpu->arch.apic_base; | ||
| 1291 | struct kvm_lapic *apic = vcpu->arch.apic; | 1360 | struct kvm_lapic *apic = vcpu->arch.apic; |
| 1292 | 1361 | ||
| 1293 | if (!apic) { | 1362 | if (!apic) { |
| @@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
| 1309 | value &= ~MSR_IA32_APICBASE_BSP; | 1378 | value &= ~MSR_IA32_APICBASE_BSP; |
| 1310 | 1379 | ||
| 1311 | vcpu->arch.apic_base = value; | 1380 | vcpu->arch.apic_base = value; |
| 1312 | if (apic_x2apic_mode(apic)) { | 1381 | if ((old_value ^ value) & X2APIC_ENABLE) { |
| 1313 | u32 id = kvm_apic_id(apic); | 1382 | if (value & X2APIC_ENABLE) { |
| 1314 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); | 1383 | u32 id = kvm_apic_id(apic); |
| 1315 | kvm_apic_set_ldr(apic, ldr); | 1384 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); |
| 1385 | kvm_apic_set_ldr(apic, ldr); | ||
| 1386 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); | ||
| 1387 | } else | ||
| 1388 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); | ||
| 1316 | } | 1389 | } |
| 1390 | |||
| 1317 | apic->base_address = apic->vcpu->arch.apic_base & | 1391 | apic->base_address = apic->vcpu->arch.apic_base & |
| 1318 | MSR_IA32_APICBASE_BASE; | 1392 | MSR_IA32_APICBASE_BASE; |
| 1319 | 1393 | ||
| @@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
| 1359 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); | 1433 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); |
| 1360 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 1434 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
| 1361 | } | 1435 | } |
| 1362 | apic->irr_pending = false; | 1436 | apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); |
| 1363 | apic->isr_count = 0; | 1437 | apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); |
| 1364 | apic->highest_isr_cache = -1; | 1438 | apic->highest_isr_cache = -1; |
| 1365 | update_divide_count(apic); | 1439 | update_divide_count(apic); |
| 1366 | atomic_set(&apic->lapic_timer.pending, 0); | 1440 | atomic_set(&apic->lapic_timer.pending, 0); |
| @@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
| 1575 | update_divide_count(apic); | 1649 | update_divide_count(apic); |
| 1576 | start_apic_timer(apic); | 1650 | start_apic_timer(apic); |
| 1577 | apic->irr_pending = true; | 1651 | apic->irr_pending = true; |
| 1578 | apic->isr_count = count_vectors(apic->regs + APIC_ISR); | 1652 | apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? |
| 1653 | 1 : count_vectors(apic->regs + APIC_ISR); | ||
| 1579 | apic->highest_isr_cache = -1; | 1654 | apic->highest_isr_cache = -1; |
| 1655 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); | ||
| 1580 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1656 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 1581 | } | 1657 | } |
| 1582 | 1658 | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e5ebf9f3571f..1676d34ddb4e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
| @@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | |||
| 64 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); | 64 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); |
| 65 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); | 65 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); |
| 66 | 66 | ||
| 67 | void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); | ||
| 68 | void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); | ||
| 69 | |||
| 67 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); | 70 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); |
| 68 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); | 71 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); |
| 69 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); | 72 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); |
| @@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | |||
| 124 | return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); | 127 | return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); |
| 125 | } | 128 | } |
| 126 | 129 | ||
| 130 | static inline int apic_x2apic_mode(struct kvm_lapic *apic) | ||
| 131 | { | ||
| 132 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | ||
| 133 | } | ||
| 134 | |||
| 135 | static inline bool kvm_apic_vid_enabled(struct kvm *kvm) | ||
| 136 | { | ||
| 137 | return kvm_x86_ops->vm_has_apicv(kvm); | ||
| 138 | } | ||
| 139 | |||
| 140 | static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) | ||
| 141 | { | ||
| 142 | u16 cid; | ||
| 143 | ldr >>= 32 - map->ldr_bits; | ||
| 144 | cid = (ldr >> map->cid_shift) & map->cid_mask; | ||
| 145 | |||
| 146 | BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); | ||
| 147 | |||
| 148 | return cid; | ||
| 149 | } | ||
| 150 | |||
| 151 | static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | ||
| 152 | { | ||
| 153 | ldr >>= (32 - map->ldr_bits); | ||
| 154 | return ldr & map->lid_mask; | ||
| 155 | } | ||
| 156 | |||
| 157 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | ||
| 158 | struct kvm_lapic_irq *irq, | ||
| 159 | u64 *eoi_bitmap); | ||
| 160 | |||
| 127 | #endif | 161 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01d7c2ad05f5..956ca358108a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte) | |||
| 448 | 448 | ||
| 449 | static bool spte_is_locklessly_modifiable(u64 spte) | 449 | static bool spte_is_locklessly_modifiable(u64 spte) |
| 450 | { | 450 | { |
| 451 | return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); | 451 | return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) == |
| 452 | (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE); | ||
| 452 | } | 453 | } |
| 453 | 454 | ||
| 454 | static bool spte_has_volatile_bits(u64 spte) | 455 | static bool spte_has_volatile_bits(u64 spte) |
| @@ -831,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
| 831 | if (host_level == PT_PAGE_TABLE_LEVEL) | 832 | if (host_level == PT_PAGE_TABLE_LEVEL) |
| 832 | return host_level; | 833 | return host_level; |
| 833 | 834 | ||
| 834 | max_level = kvm_x86_ops->get_lpage_level() < host_level ? | 835 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); |
| 835 | kvm_x86_ops->get_lpage_level() : host_level; | ||
| 836 | 836 | ||
| 837 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | 837 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) |
| 838 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) | 838 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) |
| @@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) | |||
| 1142 | } | 1142 | } |
| 1143 | 1143 | ||
| 1144 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | 1144 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, |
| 1145 | int level, bool pt_protect) | 1145 | bool pt_protect) |
| 1146 | { | 1146 | { |
| 1147 | u64 *sptep; | 1147 | u64 *sptep; |
| 1148 | struct rmap_iterator iter; | 1148 | struct rmap_iterator iter; |
| @@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
| 1180 | while (mask) { | 1180 | while (mask) { |
| 1181 | rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), | 1181 | rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), |
| 1182 | PT_PAGE_TABLE_LEVEL, slot); | 1182 | PT_PAGE_TABLE_LEVEL, slot); |
| 1183 | __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); | 1183 | __rmap_write_protect(kvm, rmapp, false); |
| 1184 | 1184 | ||
| 1185 | /* clear the first set bit */ | 1185 | /* clear the first set bit */ |
| 1186 | mask &= mask - 1; | 1186 | mask &= mask - 1; |
| @@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 1199 | for (i = PT_PAGE_TABLE_LEVEL; | 1199 | for (i = PT_PAGE_TABLE_LEVEL; |
| 1200 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1200 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
| 1201 | rmapp = __gfn_to_rmap(gfn, i, slot); | 1201 | rmapp = __gfn_to_rmap(gfn, i, slot); |
| 1202 | write_protected |= __rmap_write_protect(kvm, rmapp, i, true); | 1202 | write_protected |= __rmap_write_protect(kvm, rmapp, true); |
| 1203 | } | 1203 | } |
| 1204 | 1204 | ||
| 1205 | return write_protected; | 1205 | return write_protected; |
| @@ -1460,28 +1460,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) | |||
| 1460 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); | 1460 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); |
| 1461 | } | 1461 | } |
| 1462 | 1462 | ||
| 1463 | /* | 1463 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) |
| 1464 | * Remove the sp from shadow page cache, after call it, | ||
| 1465 | * we can not find this sp from the cache, and the shadow | ||
| 1466 | * page table is still valid. | ||
| 1467 | * It should be under the protection of mmu lock. | ||
| 1468 | */ | ||
| 1469 | static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp) | ||
| 1470 | { | 1464 | { |
| 1471 | ASSERT(is_empty_shadow_page(sp->spt)); | 1465 | ASSERT(is_empty_shadow_page(sp->spt)); |
| 1472 | hlist_del(&sp->hash_link); | 1466 | hlist_del(&sp->hash_link); |
| 1473 | if (!sp->role.direct) | ||
| 1474 | free_page((unsigned long)sp->gfns); | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | /* | ||
| 1478 | * Free the shadow page table and the sp, we can do it | ||
| 1479 | * out of the protection of mmu lock. | ||
| 1480 | */ | ||
| 1481 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) | ||
| 1482 | { | ||
| 1483 | list_del(&sp->link); | 1467 | list_del(&sp->link); |
| 1484 | free_page((unsigned long)sp->spt); | 1468 | free_page((unsigned long)sp->spt); |
| 1469 | if (!sp->role.direct) | ||
| 1470 | free_page((unsigned long)sp->gfns); | ||
| 1485 | kmem_cache_free(mmu_page_header_cache, sp); | 1471 | kmem_cache_free(mmu_page_header_cache, sp); |
| 1486 | } | 1472 | } |
| 1487 | 1473 | ||
| @@ -1522,7 +1508,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
| 1522 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 1508 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
| 1523 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1509 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
| 1524 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1510 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
| 1525 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); | ||
| 1526 | sp->parent_ptes = 0; | 1511 | sp->parent_ptes = 0; |
| 1527 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1512 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
| 1528 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | 1513 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); |
| @@ -1659,13 +1644,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
| 1659 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 1644 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
| 1660 | struct list_head *invalid_list); | 1645 | struct list_head *invalid_list); |
| 1661 | 1646 | ||
| 1662 | #define for_each_gfn_sp(kvm, sp, gfn, pos) \ | 1647 | #define for_each_gfn_sp(kvm, sp, gfn) \ |
| 1663 | hlist_for_each_entry(sp, pos, \ | 1648 | hlist_for_each_entry(sp, \ |
| 1664 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1649 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ |
| 1665 | if ((sp)->gfn != (gfn)) {} else | 1650 | if ((sp)->gfn != (gfn)) {} else |
| 1666 | 1651 | ||
| 1667 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ | 1652 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ |
| 1668 | hlist_for_each_entry(sp, pos, \ | 1653 | hlist_for_each_entry(sp, \ |
| 1669 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1654 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ |
| 1670 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ | 1655 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ |
| 1671 | (sp)->role.invalid) {} else | 1656 | (sp)->role.invalid) {} else |
| @@ -1721,11 +1706,10 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
| 1721 | static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | 1706 | static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) |
| 1722 | { | 1707 | { |
| 1723 | struct kvm_mmu_page *s; | 1708 | struct kvm_mmu_page *s; |
| 1724 | struct hlist_node *node; | ||
| 1725 | LIST_HEAD(invalid_list); | 1709 | LIST_HEAD(invalid_list); |
| 1726 | bool flush = false; | 1710 | bool flush = false; |
| 1727 | 1711 | ||
| 1728 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | 1712 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { |
| 1729 | if (!s->unsync) | 1713 | if (!s->unsync) |
| 1730 | continue; | 1714 | continue; |
| 1731 | 1715 | ||
| @@ -1863,7 +1847,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1863 | union kvm_mmu_page_role role; | 1847 | union kvm_mmu_page_role role; |
| 1864 | unsigned quadrant; | 1848 | unsigned quadrant; |
| 1865 | struct kvm_mmu_page *sp; | 1849 | struct kvm_mmu_page *sp; |
| 1866 | struct hlist_node *node; | ||
| 1867 | bool need_sync = false; | 1850 | bool need_sync = false; |
| 1868 | 1851 | ||
| 1869 | role = vcpu->arch.mmu.base_role; | 1852 | role = vcpu->arch.mmu.base_role; |
| @@ -1878,7 +1861,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1878 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 1861 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
| 1879 | role.quadrant = quadrant; | 1862 | role.quadrant = quadrant; |
| 1880 | } | 1863 | } |
| 1881 | for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { | 1864 | for_each_gfn_sp(vcpu->kvm, sp, gfn) { |
| 1882 | if (!need_sync && sp->unsync) | 1865 | if (!need_sync && sp->unsync) |
| 1883 | need_sync = true; | 1866 | need_sync = true; |
| 1884 | 1867 | ||
| @@ -1973,9 +1956,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | |||
| 1973 | { | 1956 | { |
| 1974 | u64 spte; | 1957 | u64 spte; |
| 1975 | 1958 | ||
| 1976 | spte = __pa(sp->spt) | 1959 | spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
| 1977 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 1960 | shadow_user_mask | shadow_x_mask | shadow_accessed_mask; |
| 1978 | | PT_WRITABLE_MASK | PT_USER_MASK; | 1961 | |
| 1979 | mmu_spte_set(sptep, spte); | 1962 | mmu_spte_set(sptep, spte); |
| 1980 | } | 1963 | } |
| 1981 | 1964 | ||
| @@ -2126,7 +2109,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
| 2126 | do { | 2109 | do { |
| 2127 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 2110 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); |
| 2128 | WARN_ON(!sp->role.invalid || sp->root_count); | 2111 | WARN_ON(!sp->role.invalid || sp->root_count); |
| 2129 | kvm_mmu_isolate_page(sp); | ||
| 2130 | kvm_mmu_free_page(sp); | 2112 | kvm_mmu_free_page(sp); |
| 2131 | } while (!list_empty(invalid_list)); | 2113 | } while (!list_empty(invalid_list)); |
| 2132 | } | 2114 | } |
| @@ -2144,6 +2126,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
| 2144 | * change the value | 2126 | * change the value |
| 2145 | */ | 2127 | */ |
| 2146 | 2128 | ||
| 2129 | spin_lock(&kvm->mmu_lock); | ||
| 2130 | |||
| 2147 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { | 2131 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { |
| 2148 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && | 2132 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && |
| 2149 | !list_empty(&kvm->arch.active_mmu_pages)) { | 2133 | !list_empty(&kvm->arch.active_mmu_pages)) { |
| @@ -2158,19 +2142,20 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
| 2158 | } | 2142 | } |
| 2159 | 2143 | ||
| 2160 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; | 2144 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; |
| 2145 | |||
| 2146 | spin_unlock(&kvm->mmu_lock); | ||
| 2161 | } | 2147 | } |
| 2162 | 2148 | ||
| 2163 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 2149 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
| 2164 | { | 2150 | { |
| 2165 | struct kvm_mmu_page *sp; | 2151 | struct kvm_mmu_page *sp; |
| 2166 | struct hlist_node *node; | ||
| 2167 | LIST_HEAD(invalid_list); | 2152 | LIST_HEAD(invalid_list); |
| 2168 | int r; | 2153 | int r; |
| 2169 | 2154 | ||
| 2170 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); | 2155 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); |
| 2171 | r = 0; | 2156 | r = 0; |
| 2172 | spin_lock(&kvm->mmu_lock); | 2157 | spin_lock(&kvm->mmu_lock); |
| 2173 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 2158 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn) { |
| 2174 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, | 2159 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, |
| 2175 | sp->role.word); | 2160 | sp->role.word); |
| 2176 | r = 1; | 2161 | r = 1; |
| @@ -2183,14 +2168,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
| 2183 | } | 2168 | } |
| 2184 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); | 2169 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); |
| 2185 | 2170 | ||
| 2186 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | ||
| 2187 | { | ||
| 2188 | int slot = memslot_id(kvm, gfn); | ||
| 2189 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | ||
| 2190 | |||
| 2191 | __set_bit(slot, sp->slot_bitmap); | ||
| 2192 | } | ||
| 2193 | |||
| 2194 | /* | 2171 | /* |
| 2195 | * The function is based on mtrr_type_lookup() in | 2172 | * The function is based on mtrr_type_lookup() in |
| 2196 | * arch/x86/kernel/cpu/mtrr/generic.c | 2173 | * arch/x86/kernel/cpu/mtrr/generic.c |
| @@ -2308,9 +2285,8 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 2308 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | 2285 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) |
| 2309 | { | 2286 | { |
| 2310 | struct kvm_mmu_page *s; | 2287 | struct kvm_mmu_page *s; |
| 2311 | struct hlist_node *node; | ||
| 2312 | 2288 | ||
| 2313 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | 2289 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { |
| 2314 | if (s->unsync) | 2290 | if (s->unsync) |
| 2315 | continue; | 2291 | continue; |
| 2316 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | 2292 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); |
| @@ -2322,19 +2298,17 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
| 2322 | bool can_unsync) | 2298 | bool can_unsync) |
| 2323 | { | 2299 | { |
| 2324 | struct kvm_mmu_page *s; | 2300 | struct kvm_mmu_page *s; |
| 2325 | struct hlist_node *node; | ||
| 2326 | bool need_unsync = false; | 2301 | bool need_unsync = false; |
| 2327 | 2302 | ||
| 2328 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | 2303 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { |
| 2329 | if (!can_unsync) | 2304 | if (!can_unsync) |
| 2330 | return 1; | 2305 | return 1; |
| 2331 | 2306 | ||
| 2332 | if (s->role.level != PT_PAGE_TABLE_LEVEL) | 2307 | if (s->role.level != PT_PAGE_TABLE_LEVEL) |
| 2333 | return 1; | 2308 | return 1; |
| 2334 | 2309 | ||
| 2335 | if (!need_unsync && !s->unsync) { | 2310 | if (!s->unsync) |
| 2336 | need_unsync = true; | 2311 | need_unsync = true; |
| 2337 | } | ||
| 2338 | } | 2312 | } |
| 2339 | if (need_unsync) | 2313 | if (need_unsync) |
| 2340 | kvm_unsync_pages(vcpu, gfn); | 2314 | kvm_unsync_pages(vcpu, gfn); |
| @@ -2342,8 +2316,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
| 2342 | } | 2316 | } |
| 2343 | 2317 | ||
| 2344 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 2318 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
| 2345 | unsigned pte_access, int user_fault, | 2319 | unsigned pte_access, int level, |
| 2346 | int write_fault, int level, | ||
| 2347 | gfn_t gfn, pfn_t pfn, bool speculative, | 2320 | gfn_t gfn, pfn_t pfn, bool speculative, |
| 2348 | bool can_unsync, bool host_writable) | 2321 | bool can_unsync, bool host_writable) |
| 2349 | { | 2322 | { |
| @@ -2378,20 +2351,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 2378 | 2351 | ||
| 2379 | spte |= (u64)pfn << PAGE_SHIFT; | 2352 | spte |= (u64)pfn << PAGE_SHIFT; |
| 2380 | 2353 | ||
| 2381 | if ((pte_access & ACC_WRITE_MASK) | 2354 | if (pte_access & ACC_WRITE_MASK) { |
| 2382 | || (!vcpu->arch.mmu.direct_map && write_fault | ||
| 2383 | && !is_write_protection(vcpu) && !user_fault)) { | ||
| 2384 | 2355 | ||
| 2385 | /* | 2356 | /* |
| 2386 | * There are two cases: | 2357 | * Other vcpu creates new sp in the window between |
| 2387 | * - the one is other vcpu creates new sp in the window | 2358 | * mapping_level() and acquiring mmu-lock. We can |
| 2388 | * between mapping_level() and acquiring mmu-lock. | 2359 | * allow guest to retry the access, the mapping can |
| 2389 | * - the another case is the new sp is created by itself | 2360 | * be fixed if guest refault. |
| 2390 | * (page-fault path) when guest uses the target gfn as | ||
| 2391 | * its page table. | ||
| 2392 | * Both of these cases can be fixed by allowing guest to | ||
| 2393 | * retry the access, it will refault, then we can establish | ||
| 2394 | * the mapping by using small page. | ||
| 2395 | */ | 2361 | */ |
| 2396 | if (level > PT_PAGE_TABLE_LEVEL && | 2362 | if (level > PT_PAGE_TABLE_LEVEL && |
| 2397 | has_wrprotected_page(vcpu->kvm, gfn, level)) | 2363 | has_wrprotected_page(vcpu->kvm, gfn, level)) |
| @@ -2399,19 +2365,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 2399 | 2365 | ||
| 2400 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; | 2366 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; |
| 2401 | 2367 | ||
| 2402 | if (!vcpu->arch.mmu.direct_map | ||
| 2403 | && !(pte_access & ACC_WRITE_MASK)) { | ||
| 2404 | spte &= ~PT_USER_MASK; | ||
| 2405 | /* | ||
| 2406 | * If we converted a user page to a kernel page, | ||
| 2407 | * so that the kernel can write to it when cr0.wp=0, | ||
| 2408 | * then we should prevent the kernel from executing it | ||
| 2409 | * if SMEP is enabled. | ||
| 2410 | */ | ||
| 2411 | if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
| 2412 | spte |= PT64_NX_MASK; | ||
| 2413 | } | ||
| 2414 | |||
| 2415 | /* | 2368 | /* |
| 2416 | * Optimization: for pte sync, if spte was writable the hash | 2369 | * Optimization: for pte sync, if spte was writable the hash |
| 2417 | * lookup is unnecessary (and expensive). Write protection | 2370 | * lookup is unnecessary (and expensive). Write protection |
| @@ -2441,19 +2394,15 @@ done: | |||
| 2441 | } | 2394 | } |
| 2442 | 2395 | ||
| 2443 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 2396 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
| 2444 | unsigned pt_access, unsigned pte_access, | 2397 | unsigned pte_access, int write_fault, int *emulate, |
| 2445 | int user_fault, int write_fault, | 2398 | int level, gfn_t gfn, pfn_t pfn, bool speculative, |
| 2446 | int *emulate, int level, gfn_t gfn, | ||
| 2447 | pfn_t pfn, bool speculative, | ||
| 2448 | bool host_writable) | 2399 | bool host_writable) |
| 2449 | { | 2400 | { |
| 2450 | int was_rmapped = 0; | 2401 | int was_rmapped = 0; |
| 2451 | int rmap_count; | 2402 | int rmap_count; |
| 2452 | 2403 | ||
| 2453 | pgprintk("%s: spte %llx access %x write_fault %d" | 2404 | pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, |
| 2454 | " user_fault %d gfn %llx\n", | 2405 | *sptep, write_fault, gfn); |
| 2455 | __func__, *sptep, pt_access, | ||
| 2456 | write_fault, user_fault, gfn); | ||
| 2457 | 2406 | ||
| 2458 | if (is_rmap_spte(*sptep)) { | 2407 | if (is_rmap_spte(*sptep)) { |
| 2459 | /* | 2408 | /* |
| @@ -2477,9 +2426,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 2477 | was_rmapped = 1; | 2426 | was_rmapped = 1; |
| 2478 | } | 2427 | } |
| 2479 | 2428 | ||
| 2480 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 2429 | if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, |
| 2481 | level, gfn, pfn, speculative, true, | 2430 | true, host_writable)) { |
| 2482 | host_writable)) { | ||
| 2483 | if (write_fault) | 2431 | if (write_fault) |
| 2484 | *emulate = 1; | 2432 | *emulate = 1; |
| 2485 | kvm_mmu_flush_tlb(vcpu); | 2433 | kvm_mmu_flush_tlb(vcpu); |
| @@ -2497,7 +2445,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 2497 | ++vcpu->kvm->stat.lpages; | 2445 | ++vcpu->kvm->stat.lpages; |
| 2498 | 2446 | ||
| 2499 | if (is_shadow_present_pte(*sptep)) { | 2447 | if (is_shadow_present_pte(*sptep)) { |
| 2500 | page_header_update_slot(vcpu->kvm, sptep, gfn); | ||
| 2501 | if (!was_rmapped) { | 2448 | if (!was_rmapped) { |
| 2502 | rmap_count = rmap_add(vcpu, sptep, gfn); | 2449 | rmap_count = rmap_add(vcpu, sptep, gfn); |
| 2503 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 2450 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
| @@ -2571,10 +2518,9 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | |||
| 2571 | return -1; | 2518 | return -1; |
| 2572 | 2519 | ||
| 2573 | for (i = 0; i < ret; i++, gfn++, start++) | 2520 | for (i = 0; i < ret; i++, gfn++, start++) |
| 2574 | mmu_set_spte(vcpu, start, ACC_ALL, | 2521 | mmu_set_spte(vcpu, start, access, 0, NULL, |
| 2575 | access, 0, 0, NULL, | 2522 | sp->role.level, gfn, page_to_pfn(pages[i]), |
| 2576 | sp->role.level, gfn, | 2523 | true, true); |
| 2577 | page_to_pfn(pages[i]), true, true); | ||
| 2578 | 2524 | ||
| 2579 | return 0; | 2525 | return 0; |
| 2580 | } | 2526 | } |
| @@ -2633,11 +2579,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
| 2633 | 2579 | ||
| 2634 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2580 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
| 2635 | if (iterator.level == level) { | 2581 | if (iterator.level == level) { |
| 2636 | unsigned pte_access = ACC_ALL; | 2582 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, |
| 2637 | 2583 | write, &emulate, level, gfn, pfn, | |
| 2638 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, | 2584 | prefault, map_writable); |
| 2639 | 0, write, &emulate, | ||
| 2640 | level, gfn, pfn, prefault, map_writable); | ||
| 2641 | direct_pte_prefetch(vcpu, iterator.sptep); | 2585 | direct_pte_prefetch(vcpu, iterator.sptep); |
| 2642 | ++vcpu->stat.pf_fixed; | 2586 | ++vcpu->stat.pf_fixed; |
| 2643 | break; | 2587 | break; |
| @@ -2652,11 +2596,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
| 2652 | iterator.level - 1, | 2596 | iterator.level - 1, |
| 2653 | 1, ACC_ALL, iterator.sptep); | 2597 | 1, ACC_ALL, iterator.sptep); |
| 2654 | 2598 | ||
| 2655 | mmu_spte_set(iterator.sptep, | 2599 | link_shadow_page(iterator.sptep, sp); |
| 2656 | __pa(sp->spt) | ||
| 2657 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | ||
| 2658 | | shadow_user_mask | shadow_x_mask | ||
| 2659 | | shadow_accessed_mask); | ||
| 2660 | } | 2600 | } |
| 2661 | } | 2601 | } |
| 2662 | return emulate; | 2602 | return emulate; |
| @@ -3719,6 +3659,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
| 3719 | else | 3659 | else |
| 3720 | r = paging32_init_context(vcpu, context); | 3660 | r = paging32_init_context(vcpu, context); |
| 3721 | 3661 | ||
| 3662 | vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); | ||
| 3722 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); | 3663 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
| 3723 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | 3664 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); |
| 3724 | vcpu->arch.mmu.base_role.smep_andnot_wp | 3665 | vcpu->arch.mmu.base_role.smep_andnot_wp |
| @@ -3885,7 +3826,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, | |||
| 3885 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 3826 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
| 3886 | *gpa &= ~(gpa_t)7; | 3827 | *gpa &= ~(gpa_t)7; |
| 3887 | *bytes = 8; | 3828 | *bytes = 8; |
| 3888 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); | 3829 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8); |
| 3889 | if (r) | 3830 | if (r) |
| 3890 | gentry = 0; | 3831 | gentry = 0; |
| 3891 | new = (const u8 *)&gentry; | 3832 | new = (const u8 *)&gentry; |
| @@ -3987,7 +3928,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 3987 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3928 | gfn_t gfn = gpa >> PAGE_SHIFT; |
| 3988 | union kvm_mmu_page_role mask = { .word = 0 }; | 3929 | union kvm_mmu_page_role mask = { .word = 0 }; |
| 3989 | struct kvm_mmu_page *sp; | 3930 | struct kvm_mmu_page *sp; |
| 3990 | struct hlist_node *node; | ||
| 3991 | LIST_HEAD(invalid_list); | 3931 | LIST_HEAD(invalid_list); |
| 3992 | u64 entry, gentry, *spte; | 3932 | u64 entry, gentry, *spte; |
| 3993 | int npte; | 3933 | int npte; |
| @@ -4018,7 +3958,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 4018 | kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | 3958 | kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); |
| 4019 | 3959 | ||
| 4020 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; | 3960 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
| 4021 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { | 3961 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { |
| 4022 | if (detect_write_misaligned(sp, gpa, bytes) || | 3962 | if (detect_write_misaligned(sp, gpa, bytes) || |
| 4023 | detect_write_flooding(sp)) { | 3963 | detect_write_flooding(sp)) { |
| 4024 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 3964 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
| @@ -4039,7 +3979,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 4039 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3979 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
| 4040 | & mask.word) && rmap_can_add(vcpu)) | 3980 | & mask.word) && rmap_can_add(vcpu)) |
| 4041 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3981 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
| 4042 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3982 | if (need_remote_flush(entry, *spte)) |
| 4043 | remote_flush = true; | 3983 | remote_flush = true; |
| 4044 | ++spte; | 3984 | ++spte; |
| 4045 | } | 3985 | } |
| @@ -4198,26 +4138,36 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) | |||
| 4198 | 4138 | ||
| 4199 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 4139 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) |
| 4200 | { | 4140 | { |
| 4201 | struct kvm_mmu_page *sp; | 4141 | struct kvm_memory_slot *memslot; |
| 4202 | bool flush = false; | 4142 | gfn_t last_gfn; |
| 4143 | int i; | ||
| 4203 | 4144 | ||
| 4204 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 4145 | memslot = id_to_memslot(kvm->memslots, slot); |
| 4205 | int i; | 4146 | last_gfn = memslot->base_gfn + memslot->npages - 1; |
| 4206 | u64 *pt; | ||
| 4207 | 4147 | ||
| 4208 | if (!test_bit(slot, sp->slot_bitmap)) | 4148 | spin_lock(&kvm->mmu_lock); |
| 4209 | continue; | ||
| 4210 | 4149 | ||
| 4211 | pt = sp->spt; | 4150 | for (i = PT_PAGE_TABLE_LEVEL; |
| 4212 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 4151 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
| 4213 | if (!is_shadow_present_pte(pt[i]) || | 4152 | unsigned long *rmapp; |
| 4214 | !is_last_spte(pt[i], sp->role.level)) | 4153 | unsigned long last_index, index; |
| 4215 | continue; | ||
| 4216 | 4154 | ||
| 4217 | spte_write_protect(kvm, &pt[i], &flush, false); | 4155 | rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; |
| 4156 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); | ||
| 4157 | |||
| 4158 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
| 4159 | if (*rmapp) | ||
| 4160 | __rmap_write_protect(kvm, rmapp, false); | ||
| 4161 | |||
| 4162 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | ||
| 4163 | kvm_flush_remote_tlbs(kvm); | ||
| 4164 | cond_resched_lock(&kvm->mmu_lock); | ||
| 4165 | } | ||
| 4218 | } | 4166 | } |
| 4219 | } | 4167 | } |
| 4168 | |||
| 4220 | kvm_flush_remote_tlbs(kvm); | 4169 | kvm_flush_remote_tlbs(kvm); |
| 4170 | spin_unlock(&kvm->mmu_lock); | ||
| 4221 | } | 4171 | } |
| 4222 | 4172 | ||
| 4223 | void kvm_mmu_zap_all(struct kvm *kvm) | 4173 | void kvm_mmu_zap_all(struct kvm *kvm) |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index cd6e98333ba3..b8f6172f4174 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
| @@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, | |||
| 195 | TP_ARGS(sp) | 195 | TP_ARGS(sp) |
| 196 | ); | 196 | ); |
| 197 | 197 | ||
| 198 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages, | ||
| 199 | TP_PROTO(struct kvm_mmu_page *sp), | ||
| 200 | |||
| 201 | TP_ARGS(sp) | ||
| 202 | ); | ||
| 203 | |||
| 204 | TRACE_EVENT( | 198 | TRACE_EVENT( |
| 205 | mark_mmio_spte, | 199 | mark_mmio_spte, |
| 206 | TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), | 200 | TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 891eb6d93b8b..105dd5bd550e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
| 151 | pt_element_t pte; | 151 | pt_element_t pte; |
| 152 | pt_element_t __user *uninitialized_var(ptep_user); | 152 | pt_element_t __user *uninitialized_var(ptep_user); |
| 153 | gfn_t table_gfn; | 153 | gfn_t table_gfn; |
| 154 | unsigned index, pt_access, pte_access, accessed_dirty, shift; | 154 | unsigned index, pt_access, pte_access, accessed_dirty; |
| 155 | gpa_t pte_gpa; | 155 | gpa_t pte_gpa; |
| 156 | int offset; | 156 | int offset; |
| 157 | const int write_fault = access & PFERR_WRITE_MASK; | 157 | const int write_fault = access & PFERR_WRITE_MASK; |
| @@ -249,16 +249,12 @@ retry_walk: | |||
| 249 | 249 | ||
| 250 | if (!write_fault) | 250 | if (!write_fault) |
| 251 | protect_clean_gpte(&pte_access, pte); | 251 | protect_clean_gpte(&pte_access, pte); |
| 252 | 252 | else | |
| 253 | /* | 253 | /* |
| 254 | * On a write fault, fold the dirty bit into accessed_dirty by shifting it one | 254 | * On a write fault, fold the dirty bit into accessed_dirty by |
| 255 | * place right. | 255 | * shifting it one place right. |
| 256 | * | 256 | */ |
| 257 | * On a read fault, do nothing. | 257 | accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT); |
| 258 | */ | ||
| 259 | shift = write_fault >> ilog2(PFERR_WRITE_MASK); | ||
| 260 | shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT; | ||
| 261 | accessed_dirty &= pte >> shift; | ||
| 262 | 258 | ||
| 263 | if (unlikely(!accessed_dirty)) { | 259 | if (unlikely(!accessed_dirty)) { |
| 264 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); | 260 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); |
| @@ -330,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
| 330 | * we call mmu_set_spte() with host_writable = true because | 326 | * we call mmu_set_spte() with host_writable = true because |
| 331 | * pte_prefetch_gfn_to_pfn always gets a writable pfn. | 327 | * pte_prefetch_gfn_to_pfn always gets a writable pfn. |
| 332 | */ | 328 | */ |
| 333 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 329 | mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, |
| 334 | NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); | 330 | gfn, pfn, true, true); |
| 335 | 331 | ||
| 336 | return true; | 332 | return true; |
| 337 | } | 333 | } |
| @@ -405,7 +401,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
| 405 | */ | 401 | */ |
| 406 | static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 402 | static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
| 407 | struct guest_walker *gw, | 403 | struct guest_walker *gw, |
| 408 | int user_fault, int write_fault, int hlevel, | 404 | int write_fault, int hlevel, |
| 409 | pfn_t pfn, bool map_writable, bool prefault) | 405 | pfn_t pfn, bool map_writable, bool prefault) |
| 410 | { | 406 | { |
| 411 | struct kvm_mmu_page *sp = NULL; | 407 | struct kvm_mmu_page *sp = NULL; |
| @@ -413,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 413 | unsigned direct_access, access = gw->pt_access; | 409 | unsigned direct_access, access = gw->pt_access; |
| 414 | int top_level, emulate = 0; | 410 | int top_level, emulate = 0; |
| 415 | 411 | ||
| 416 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | ||
| 417 | return 0; | ||
| 418 | |||
| 419 | direct_access = gw->pte_access; | 412 | direct_access = gw->pte_access; |
| 420 | 413 | ||
| 421 | top_level = vcpu->arch.mmu.root_level; | 414 | top_level = vcpu->arch.mmu.root_level; |
| @@ -477,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 477 | } | 470 | } |
| 478 | 471 | ||
| 479 | clear_sp_write_flooding_count(it.sptep); | 472 | clear_sp_write_flooding_count(it.sptep); |
| 480 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, | 473 | mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate, |
| 481 | user_fault, write_fault, &emulate, it.level, | 474 | it.level, gw->gfn, pfn, prefault, map_writable); |
| 482 | gw->gfn, pfn, prefault, map_writable); | ||
| 483 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); | 475 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); |
| 484 | 476 | ||
| 485 | return emulate; | 477 | return emulate; |
| @@ -491,6 +483,46 @@ out_gpte_changed: | |||
| 491 | return 0; | 483 | return 0; |
| 492 | } | 484 | } |
| 493 | 485 | ||
| 486 | /* | ||
| 487 | * To see whether the mapped gfn can write its page table in the current | ||
| 488 | * mapping. | ||
| 489 | * | ||
| 490 | * It is the helper function of FNAME(page_fault). When guest uses large page | ||
| 491 | * size to map the writable gfn which is used as current page table, we should | ||
| 492 | * force kvm to use small page size to map it because new shadow page will be | ||
| 493 | * created when kvm establishes shadow page table that stop kvm using large | ||
| 494 | * page size. Do it early can avoid unnecessary #PF and emulation. | ||
| 495 | * | ||
| 496 | * @write_fault_to_shadow_pgtable will return true if the fault gfn is | ||
| 497 | * currently used as its page table. | ||
| 498 | * | ||
| 499 | * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok | ||
| 500 | * since the PDPT is always shadowed, that means, we can not use large page | ||
| 501 | * size to map the gfn which is used as PDPT. | ||
| 502 | */ | ||
| 503 | static bool | ||
| 504 | FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, | ||
| 505 | struct guest_walker *walker, int user_fault, | ||
| 506 | bool *write_fault_to_shadow_pgtable) | ||
| 507 | { | ||
| 508 | int level; | ||
| 509 | gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1); | ||
| 510 | bool self_changed = false; | ||
| 511 | |||
| 512 | if (!(walker->pte_access & ACC_WRITE_MASK || | ||
| 513 | (!is_write_protection(vcpu) && !user_fault))) | ||
| 514 | return false; | ||
| 515 | |||
| 516 | for (level = walker->level; level <= walker->max_level; level++) { | ||
| 517 | gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1]; | ||
| 518 | |||
| 519 | self_changed |= !(gfn & mask); | ||
| 520 | *write_fault_to_shadow_pgtable |= !gfn; | ||
| 521 | } | ||
| 522 | |||
| 523 | return self_changed; | ||
| 524 | } | ||
| 525 | |||
| 494 | /* | 526 | /* |
| 495 | * Page fault handler. There are several causes for a page fault: | 527 | * Page fault handler. There are several causes for a page fault: |
| 496 | * - there is no shadow pte for the guest pte | 528 | * - there is no shadow pte for the guest pte |
| @@ -516,7 +548,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 516 | int level = PT_PAGE_TABLE_LEVEL; | 548 | int level = PT_PAGE_TABLE_LEVEL; |
| 517 | int force_pt_level; | 549 | int force_pt_level; |
| 518 | unsigned long mmu_seq; | 550 | unsigned long mmu_seq; |
| 519 | bool map_writable; | 551 | bool map_writable, is_self_change_mapping; |
| 520 | 552 | ||
| 521 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 553 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
| 522 | 554 | ||
| @@ -544,8 +576,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 544 | return 0; | 576 | return 0; |
| 545 | } | 577 | } |
| 546 | 578 | ||
| 579 | vcpu->arch.write_fault_to_shadow_pgtable = false; | ||
| 580 | |||
| 581 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, | ||
| 582 | &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); | ||
| 583 | |||
| 547 | if (walker.level >= PT_DIRECTORY_LEVEL) | 584 | if (walker.level >= PT_DIRECTORY_LEVEL) |
| 548 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); | 585 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) |
| 586 | || is_self_change_mapping; | ||
| 549 | else | 587 | else |
| 550 | force_pt_level = 1; | 588 | force_pt_level = 1; |
| 551 | if (!force_pt_level) { | 589 | if (!force_pt_level) { |
| @@ -564,6 +602,26 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 564 | walker.gfn, pfn, walker.pte_access, &r)) | 602 | walker.gfn, pfn, walker.pte_access, &r)) |
| 565 | return r; | 603 | return r; |
| 566 | 604 | ||
| 605 | /* | ||
| 606 | * Do not change pte_access if the pfn is a mmio page, otherwise | ||
| 607 | * we will cache the incorrect access into mmio spte. | ||
| 608 | */ | ||
| 609 | if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) && | ||
| 610 | !is_write_protection(vcpu) && !user_fault && | ||
| 611 | !is_noslot_pfn(pfn)) { | ||
| 612 | walker.pte_access |= ACC_WRITE_MASK; | ||
| 613 | walker.pte_access &= ~ACC_USER_MASK; | ||
| 614 | |||
| 615 | /* | ||
| 616 | * If we converted a user page to a kernel page, | ||
| 617 | * so that the kernel can write to it when cr0.wp=0, | ||
| 618 | * then we should prevent the kernel from executing it | ||
| 619 | * if SMEP is enabled. | ||
| 620 | */ | ||
| 621 | if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
| 622 | walker.pte_access &= ~ACC_EXEC_MASK; | ||
| 623 | } | ||
| 624 | |||
| 567 | spin_lock(&vcpu->kvm->mmu_lock); | 625 | spin_lock(&vcpu->kvm->mmu_lock); |
| 568 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 626 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
| 569 | goto out_unlock; | 627 | goto out_unlock; |
| @@ -572,7 +630,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 572 | kvm_mmu_free_some_pages(vcpu); | 630 | kvm_mmu_free_some_pages(vcpu); |
| 573 | if (!force_pt_level) | 631 | if (!force_pt_level) |
| 574 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 632 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
| 575 | r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 633 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
| 576 | level, pfn, map_writable, prefault); | 634 | level, pfn, map_writable, prefault); |
| 577 | ++vcpu->stat.pf_fixed; | 635 | ++vcpu->stat.pf_fixed; |
| 578 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 636 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
| @@ -747,7 +805,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 747 | 805 | ||
| 748 | host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; | 806 | host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; |
| 749 | 807 | ||
| 750 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 808 | set_spte(vcpu, &sp->spt[i], pte_access, |
| 751 | PT_PAGE_TABLE_LEVEL, gfn, | 809 | PT_PAGE_TABLE_LEVEL, gfn, |
| 752 | spte_to_pfn(sp->spt[i]), true, false, | 810 | spte_to_pfn(sp->spt[i]), true, false, |
| 753 | host_writable); | 811 | host_writable); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d29d3cd1c156..e1b1ce21bc00 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
| 3571 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); | 3571 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
| 3572 | } | 3572 | } |
| 3573 | 3573 | ||
| 3574 | static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
| 3575 | { | ||
| 3576 | return; | ||
| 3577 | } | ||
| 3578 | |||
| 3579 | static int svm_vm_has_apicv(struct kvm *kvm) | ||
| 3580 | { | ||
| 3581 | return 0; | ||
| 3582 | } | ||
| 3583 | |||
| 3584 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
| 3585 | { | ||
| 3586 | return; | ||
| 3587 | } | ||
| 3588 | |||
| 3589 | static void svm_hwapic_isr_update(struct kvm *kvm, int isr) | ||
| 3590 | { | ||
| 3591 | return; | ||
| 3592 | } | ||
| 3593 | |||
| 3574 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 3594 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
| 3575 | { | 3595 | { |
| 3576 | struct vcpu_svm *svm = to_svm(vcpu); | 3596 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 4290 | .enable_nmi_window = enable_nmi_window, | 4310 | .enable_nmi_window = enable_nmi_window, |
| 4291 | .enable_irq_window = enable_irq_window, | 4311 | .enable_irq_window = enable_irq_window, |
| 4292 | .update_cr8_intercept = update_cr8_intercept, | 4312 | .update_cr8_intercept = update_cr8_intercept, |
| 4313 | .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, | ||
| 4314 | .vm_has_apicv = svm_vm_has_apicv, | ||
| 4315 | .load_eoi_exitmap = svm_load_eoi_exitmap, | ||
| 4316 | .hwapic_isr_update = svm_hwapic_isr_update, | ||
| 4293 | 4317 | ||
| 4294 | .set_tss_addr = svm_set_tss_addr, | 4318 | .set_tss_addr = svm_set_tss_addr, |
| 4295 | .get_tdp_level = get_npt_level, | 4319 | .get_tdp_level = get_npt_level, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9120ae1901e4..6667042714cc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -84,6 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
| 84 | static bool __read_mostly fasteoi = 1; | 84 | static bool __read_mostly fasteoi = 1; |
| 85 | module_param(fasteoi, bool, S_IRUGO); | 85 | module_param(fasteoi, bool, S_IRUGO); |
| 86 | 86 | ||
| 87 | static bool __read_mostly enable_apicv_reg_vid; | ||
| 88 | |||
| 87 | /* | 89 | /* |
| 88 | * If nested=1, nested virtualization is supported, i.e., guests may use | 90 | * If nested=1, nested virtualization is supported, i.e., guests may use |
| 89 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 91 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
| @@ -92,12 +94,8 @@ module_param(fasteoi, bool, S_IRUGO); | |||
| 92 | static bool __read_mostly nested = 0; | 94 | static bool __read_mostly nested = 0; |
| 93 | module_param(nested, bool, S_IRUGO); | 95 | module_param(nested, bool, S_IRUGO); |
| 94 | 96 | ||
| 95 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 97 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) |
| 96 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 98 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) |
| 97 | #define KVM_GUEST_CR0_MASK \ | ||
| 98 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
| 99 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
| 100 | (X86_CR0_WP | X86_CR0_NE) | ||
| 101 | #define KVM_VM_CR0_ALWAYS_ON \ | 99 | #define KVM_VM_CR0_ALWAYS_ON \ |
| 102 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | 100 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) |
| 103 | #define KVM_CR4_GUEST_OWNED_BITS \ | 101 | #define KVM_CR4_GUEST_OWNED_BITS \ |
| @@ -624,6 +622,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
| 624 | struct kvm_segment *var, int seg); | 622 | struct kvm_segment *var, int seg); |
| 625 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | 623 | static void vmx_get_segment(struct kvm_vcpu *vcpu, |
| 626 | struct kvm_segment *var, int seg); | 624 | struct kvm_segment *var, int seg); |
| 625 | static bool guest_state_valid(struct kvm_vcpu *vcpu); | ||
| 626 | static u32 vmx_segment_access_rights(struct kvm_segment *var); | ||
| 627 | 627 | ||
| 628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
| 629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
| @@ -638,6 +638,8 @@ static unsigned long *vmx_io_bitmap_a; | |||
| 638 | static unsigned long *vmx_io_bitmap_b; | 638 | static unsigned long *vmx_io_bitmap_b; |
| 639 | static unsigned long *vmx_msr_bitmap_legacy; | 639 | static unsigned long *vmx_msr_bitmap_legacy; |
| 640 | static unsigned long *vmx_msr_bitmap_longmode; | 640 | static unsigned long *vmx_msr_bitmap_longmode; |
| 641 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; | ||
| 642 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; | ||
| 641 | 643 | ||
| 642 | static bool cpu_has_load_ia32_efer; | 644 | static bool cpu_has_load_ia32_efer; |
| 643 | static bool cpu_has_load_perf_global_ctrl; | 645 | static bool cpu_has_load_perf_global_ctrl; |
| @@ -762,6 +764,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | |||
| 762 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 764 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
| 763 | } | 765 | } |
| 764 | 766 | ||
| 767 | static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) | ||
| 768 | { | ||
| 769 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 770 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
| 771 | } | ||
| 772 | |||
| 773 | static inline bool cpu_has_vmx_apic_register_virt(void) | ||
| 774 | { | ||
| 775 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 776 | SECONDARY_EXEC_APIC_REGISTER_VIRT; | ||
| 777 | } | ||
| 778 | |||
| 779 | static inline bool cpu_has_vmx_virtual_intr_delivery(void) | ||
| 780 | { | ||
| 781 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 782 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
| 783 | } | ||
| 784 | |||
| 765 | static inline bool cpu_has_vmx_flexpriority(void) | 785 | static inline bool cpu_has_vmx_flexpriority(void) |
| 766 | { | 786 | { |
| 767 | return cpu_has_vmx_tpr_shadow() && | 787 | return cpu_has_vmx_tpr_shadow() && |
| @@ -1694,7 +1714,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
| 1694 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1714 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
| 1695 | { | 1715 | { |
| 1696 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); | 1716 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); |
| 1697 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
| 1698 | to_vmx(vcpu)->rflags = rflags; | 1717 | to_vmx(vcpu)->rflags = rflags; |
| 1699 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1718 | if (to_vmx(vcpu)->rmode.vm86_active) { |
| 1700 | to_vmx(vcpu)->rmode.save_rflags = rflags; | 1719 | to_vmx(vcpu)->rmode.save_rflags = rflags; |
| @@ -1820,6 +1839,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
| 1820 | vmx->guest_msrs[from] = tmp; | 1839 | vmx->guest_msrs[from] = tmp; |
| 1821 | } | 1840 | } |
| 1822 | 1841 | ||
| 1842 | static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) | ||
| 1843 | { | ||
| 1844 | unsigned long *msr_bitmap; | ||
| 1845 | |||
| 1846 | if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) { | ||
| 1847 | if (is_long_mode(vcpu)) | ||
| 1848 | msr_bitmap = vmx_msr_bitmap_longmode_x2apic; | ||
| 1849 | else | ||
| 1850 | msr_bitmap = vmx_msr_bitmap_legacy_x2apic; | ||
| 1851 | } else { | ||
| 1852 | if (is_long_mode(vcpu)) | ||
| 1853 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
| 1854 | else | ||
| 1855 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
| 1856 | } | ||
| 1857 | |||
| 1858 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
| 1859 | } | ||
| 1860 | |||
| 1823 | /* | 1861 | /* |
| 1824 | * Set up the vmcs to automatically save and restore system | 1862 | * Set up the vmcs to automatically save and restore system |
| 1825 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | 1863 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
| @@ -1828,7 +1866,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
| 1828 | static void setup_msrs(struct vcpu_vmx *vmx) | 1866 | static void setup_msrs(struct vcpu_vmx *vmx) |
| 1829 | { | 1867 | { |
| 1830 | int save_nmsrs, index; | 1868 | int save_nmsrs, index; |
| 1831 | unsigned long *msr_bitmap; | ||
| 1832 | 1869 | ||
| 1833 | save_nmsrs = 0; | 1870 | save_nmsrs = 0; |
| 1834 | #ifdef CONFIG_X86_64 | 1871 | #ifdef CONFIG_X86_64 |
| @@ -1860,14 +1897,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
| 1860 | 1897 | ||
| 1861 | vmx->save_nmsrs = save_nmsrs; | 1898 | vmx->save_nmsrs = save_nmsrs; |
| 1862 | 1899 | ||
| 1863 | if (cpu_has_vmx_msr_bitmap()) { | 1900 | if (cpu_has_vmx_msr_bitmap()) |
| 1864 | if (is_long_mode(&vmx->vcpu)) | 1901 | vmx_set_msr_bitmap(&vmx->vcpu); |
| 1865 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
| 1866 | else | ||
| 1867 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
| 1868 | |||
| 1869 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
| 1870 | } | ||
| 1871 | } | 1902 | } |
| 1872 | 1903 | ||
| 1873 | /* | 1904 | /* |
| @@ -2533,13 +2564,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 2533 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { | 2564 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { |
| 2534 | min2 = 0; | 2565 | min2 = 0; |
| 2535 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2566 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
| 2567 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
| 2536 | SECONDARY_EXEC_WBINVD_EXITING | | 2568 | SECONDARY_EXEC_WBINVD_EXITING | |
| 2537 | SECONDARY_EXEC_ENABLE_VPID | | 2569 | SECONDARY_EXEC_ENABLE_VPID | |
| 2538 | SECONDARY_EXEC_ENABLE_EPT | | 2570 | SECONDARY_EXEC_ENABLE_EPT | |
| 2539 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 2571 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
| 2540 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | | 2572 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
| 2541 | SECONDARY_EXEC_RDTSCP | | 2573 | SECONDARY_EXEC_RDTSCP | |
| 2542 | SECONDARY_EXEC_ENABLE_INVPCID; | 2574 | SECONDARY_EXEC_ENABLE_INVPCID | |
| 2575 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
| 2576 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
| 2543 | if (adjust_vmx_controls(min2, opt2, | 2577 | if (adjust_vmx_controls(min2, opt2, |
| 2544 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2578 | MSR_IA32_VMX_PROCBASED_CTLS2, |
| 2545 | &_cpu_based_2nd_exec_control) < 0) | 2579 | &_cpu_based_2nd_exec_control) < 0) |
| @@ -2550,6 +2584,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 2550 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | 2584 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) |
| 2551 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 2585 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
| 2552 | #endif | 2586 | #endif |
| 2587 | |||
| 2588 | if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) | ||
| 2589 | _cpu_based_2nd_exec_control &= ~( | ||
| 2590 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
| 2591 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
| 2592 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
| 2593 | |||
| 2553 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | 2594 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { |
| 2554 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT | 2595 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT |
| 2555 | enabled */ | 2596 | enabled */ |
| @@ -2747,6 +2788,15 @@ static __init int hardware_setup(void) | |||
| 2747 | if (!cpu_has_vmx_ple()) | 2788 | if (!cpu_has_vmx_ple()) |
| 2748 | ple_gap = 0; | 2789 | ple_gap = 0; |
| 2749 | 2790 | ||
| 2791 | if (!cpu_has_vmx_apic_register_virt() || | ||
| 2792 | !cpu_has_vmx_virtual_intr_delivery()) | ||
| 2793 | enable_apicv_reg_vid = 0; | ||
| 2794 | |||
| 2795 | if (enable_apicv_reg_vid) | ||
| 2796 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
| 2797 | else | ||
| 2798 | kvm_x86_ops->hwapic_irr_update = NULL; | ||
| 2799 | |||
| 2750 | if (nested) | 2800 | if (nested) |
| 2751 | nested_vmx_setup_ctls_msrs(); | 2801 | nested_vmx_setup_ctls_msrs(); |
| 2752 | 2802 | ||
| @@ -2758,18 +2808,28 @@ static __exit void hardware_unsetup(void) | |||
| 2758 | free_kvm_area(); | 2808 | free_kvm_area(); |
| 2759 | } | 2809 | } |
| 2760 | 2810 | ||
| 2761 | static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) | 2811 | static bool emulation_required(struct kvm_vcpu *vcpu) |
| 2762 | { | 2812 | { |
| 2763 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2813 | return emulate_invalid_guest_state && !guest_state_valid(vcpu); |
| 2764 | struct kvm_segment tmp = *save; | 2814 | } |
| 2765 | 2815 | ||
| 2766 | if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { | 2816 | static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, |
| 2767 | tmp.base = vmcs_readl(sf->base); | 2817 | struct kvm_segment *save) |
| 2768 | tmp.selector = vmcs_read16(sf->selector); | 2818 | { |
| 2769 | tmp.dpl = tmp.selector & SELECTOR_RPL_MASK; | 2819 | if (!emulate_invalid_guest_state) { |
| 2770 | tmp.s = 1; | 2820 | /* |
| 2821 | * CS and SS RPL should be equal during guest entry according | ||
| 2822 | * to VMX spec, but in reality it is not always so. Since vcpu | ||
| 2823 | * is in the middle of the transition from real mode to | ||
| 2824 | * protected mode it is safe to assume that RPL 0 is a good | ||
| 2825 | * default value. | ||
| 2826 | */ | ||
| 2827 | if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) | ||
| 2828 | save->selector &= ~SELECTOR_RPL_MASK; | ||
| 2829 | save->dpl = save->selector & SELECTOR_RPL_MASK; | ||
| 2830 | save->s = 1; | ||
| 2771 | } | 2831 | } |
| 2772 | vmx_set_segment(vcpu, &tmp, seg); | 2832 | vmx_set_segment(vcpu, save, seg); |
| 2773 | } | 2833 | } |
| 2774 | 2834 | ||
| 2775 | static void enter_pmode(struct kvm_vcpu *vcpu) | 2835 | static void enter_pmode(struct kvm_vcpu *vcpu) |
| @@ -2777,7 +2837,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
| 2777 | unsigned long flags; | 2837 | unsigned long flags; |
| 2778 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2838 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2779 | 2839 | ||
| 2780 | vmx->emulation_required = 1; | 2840 | /* |
| 2841 | * Update real mode segment cache. It may be not up-to-date if sement | ||
| 2842 | * register was written while vcpu was in a guest mode. | ||
| 2843 | */ | ||
| 2844 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); | ||
| 2845 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); | ||
| 2846 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); | ||
| 2847 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); | ||
| 2848 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); | ||
| 2849 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); | ||
| 2850 | |||
| 2781 | vmx->rmode.vm86_active = 0; | 2851 | vmx->rmode.vm86_active = 0; |
| 2782 | 2852 | ||
| 2783 | vmx_segment_cache_clear(vmx); | 2853 | vmx_segment_cache_clear(vmx); |
| @@ -2794,22 +2864,16 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
| 2794 | 2864 | ||
| 2795 | update_exception_bitmap(vcpu); | 2865 | update_exception_bitmap(vcpu); |
| 2796 | 2866 | ||
| 2797 | if (emulate_invalid_guest_state) | 2867 | fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); |
| 2798 | return; | 2868 | fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); |
| 2799 | 2869 | fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | |
| 2800 | fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | 2870 | fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); |
| 2801 | fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); | 2871 | fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); |
| 2802 | fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); | 2872 | fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); |
| 2803 | fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); | ||
| 2804 | |||
| 2805 | vmx_segment_cache_clear(vmx); | ||
| 2806 | 2873 | ||
| 2807 | vmcs_write16(GUEST_SS_SELECTOR, 0); | 2874 | /* CPL is always 0 when CPU enters protected mode */ |
| 2808 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); | 2875 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
| 2809 | 2876 | vmx->cpl = 0; | |
| 2810 | vmcs_write16(GUEST_CS_SELECTOR, | ||
| 2811 | vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK); | ||
| 2812 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); | ||
| 2813 | } | 2877 | } |
| 2814 | 2878 | ||
| 2815 | static gva_t rmode_tss_base(struct kvm *kvm) | 2879 | static gva_t rmode_tss_base(struct kvm *kvm) |
| @@ -2831,36 +2895,51 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
| 2831 | static void fix_rmode_seg(int seg, struct kvm_segment *save) | 2895 | static void fix_rmode_seg(int seg, struct kvm_segment *save) |
| 2832 | { | 2896 | { |
| 2833 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2897 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
| 2834 | 2898 | struct kvm_segment var = *save; | |
| 2835 | vmcs_write16(sf->selector, save->base >> 4); | 2899 | |
| 2836 | vmcs_write32(sf->base, save->base & 0xffff0); | 2900 | var.dpl = 0x3; |
| 2837 | vmcs_write32(sf->limit, 0xffff); | 2901 | if (seg == VCPU_SREG_CS) |
| 2838 | vmcs_write32(sf->ar_bytes, 0xf3); | 2902 | var.type = 0x3; |
| 2839 | if (save->base & 0xf) | 2903 | |
| 2840 | printk_once(KERN_WARNING "kvm: segment base is not paragraph" | 2904 | if (!emulate_invalid_guest_state) { |
| 2841 | " aligned when entering protected mode (seg=%d)", | 2905 | var.selector = var.base >> 4; |
| 2842 | seg); | 2906 | var.base = var.base & 0xffff0; |
| 2907 | var.limit = 0xffff; | ||
| 2908 | var.g = 0; | ||
| 2909 | var.db = 0; | ||
| 2910 | var.present = 1; | ||
| 2911 | var.s = 1; | ||
| 2912 | var.l = 0; | ||
| 2913 | var.unusable = 0; | ||
| 2914 | var.type = 0x3; | ||
| 2915 | var.avl = 0; | ||
| 2916 | if (save->base & 0xf) | ||
| 2917 | printk_once(KERN_WARNING "kvm: segment base is not " | ||
| 2918 | "paragraph aligned when entering " | ||
| 2919 | "protected mode (seg=%d)", seg); | ||
| 2920 | } | ||
| 2921 | |||
| 2922 | vmcs_write16(sf->selector, var.selector); | ||
| 2923 | vmcs_write32(sf->base, var.base); | ||
| 2924 | vmcs_write32(sf->limit, var.limit); | ||
| 2925 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); | ||
| 2843 | } | 2926 | } |
| 2844 | 2927 | ||
| 2845 | static void enter_rmode(struct kvm_vcpu *vcpu) | 2928 | static void enter_rmode(struct kvm_vcpu *vcpu) |
| 2846 | { | 2929 | { |
| 2847 | unsigned long flags; | 2930 | unsigned long flags; |
| 2848 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2931 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2849 | struct kvm_segment var; | ||
| 2850 | |||
| 2851 | if (enable_unrestricted_guest) | ||
| 2852 | return; | ||
| 2853 | 2932 | ||
| 2854 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); | 2933 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); |
| 2855 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); | 2934 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); |
| 2856 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); | 2935 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); |
| 2857 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); | 2936 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); |
| 2858 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); | 2937 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); |
| 2938 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); | ||
| 2939 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); | ||
| 2859 | 2940 | ||
| 2860 | vmx->emulation_required = 1; | ||
| 2861 | vmx->rmode.vm86_active = 1; | 2941 | vmx->rmode.vm86_active = 1; |
| 2862 | 2942 | ||
| 2863 | |||
| 2864 | /* | 2943 | /* |
| 2865 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | 2944 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering |
| 2866 | * vcpu. Call it here with phys address pointing 16M below 4G. | 2945 | * vcpu. Call it here with phys address pointing 16M below 4G. |
| @@ -2888,28 +2967,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 2888 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); | 2967 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); |
| 2889 | update_exception_bitmap(vcpu); | 2968 | update_exception_bitmap(vcpu); |
| 2890 | 2969 | ||
| 2891 | if (emulate_invalid_guest_state) | 2970 | fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); |
| 2892 | goto continue_rmode; | 2971 | fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); |
| 2893 | 2972 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | |
| 2894 | vmx_get_segment(vcpu, &var, VCPU_SREG_SS); | 2973 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); |
| 2895 | vmx_set_segment(vcpu, &var, VCPU_SREG_SS); | 2974 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); |
| 2896 | 2975 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); | |
| 2897 | vmx_get_segment(vcpu, &var, VCPU_SREG_CS); | ||
| 2898 | vmx_set_segment(vcpu, &var, VCPU_SREG_CS); | ||
| 2899 | |||
| 2900 | vmx_get_segment(vcpu, &var, VCPU_SREG_ES); | ||
| 2901 | vmx_set_segment(vcpu, &var, VCPU_SREG_ES); | ||
| 2902 | |||
| 2903 | vmx_get_segment(vcpu, &var, VCPU_SREG_DS); | ||
| 2904 | vmx_set_segment(vcpu, &var, VCPU_SREG_DS); | ||
| 2905 | 2976 | ||
| 2906 | vmx_get_segment(vcpu, &var, VCPU_SREG_GS); | ||
| 2907 | vmx_set_segment(vcpu, &var, VCPU_SREG_GS); | ||
| 2908 | |||
| 2909 | vmx_get_segment(vcpu, &var, VCPU_SREG_FS); | ||
| 2910 | vmx_set_segment(vcpu, &var, VCPU_SREG_FS); | ||
| 2911 | |||
| 2912 | continue_rmode: | ||
| 2913 | kvm_mmu_reset_context(vcpu); | 2977 | kvm_mmu_reset_context(vcpu); |
| 2914 | } | 2978 | } |
| 2915 | 2979 | ||
| @@ -3068,17 +3132,18 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 3068 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3132 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3069 | unsigned long hw_cr0; | 3133 | unsigned long hw_cr0; |
| 3070 | 3134 | ||
| 3135 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK); | ||
| 3071 | if (enable_unrestricted_guest) | 3136 | if (enable_unrestricted_guest) |
| 3072 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) | 3137 | hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; |
| 3073 | | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; | 3138 | else { |
| 3074 | else | 3139 | hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; |
| 3075 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; | ||
| 3076 | 3140 | ||
| 3077 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) | 3141 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) |
| 3078 | enter_pmode(vcpu); | 3142 | enter_pmode(vcpu); |
| 3079 | 3143 | ||
| 3080 | if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) | 3144 | if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) |
| 3081 | enter_rmode(vcpu); | 3145 | enter_rmode(vcpu); |
| 3146 | } | ||
| 3082 | 3147 | ||
| 3083 | #ifdef CONFIG_X86_64 | 3148 | #ifdef CONFIG_X86_64 |
| 3084 | if (vcpu->arch.efer & EFER_LME) { | 3149 | if (vcpu->arch.efer & EFER_LME) { |
| @@ -3098,7 +3163,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 3098 | vmcs_writel(CR0_READ_SHADOW, cr0); | 3163 | vmcs_writel(CR0_READ_SHADOW, cr0); |
| 3099 | vmcs_writel(GUEST_CR0, hw_cr0); | 3164 | vmcs_writel(GUEST_CR0, hw_cr0); |
| 3100 | vcpu->arch.cr0 = cr0; | 3165 | vcpu->arch.cr0 = cr0; |
| 3101 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3166 | |
| 3167 | /* depends on vcpu->arch.cr0 to be set to a new value */ | ||
| 3168 | vmx->emulation_required = emulation_required(vcpu); | ||
| 3102 | } | 3169 | } |
| 3103 | 3170 | ||
| 3104 | static u64 construct_eptp(unsigned long root_hpa) | 3171 | static u64 construct_eptp(unsigned long root_hpa) |
| @@ -3155,6 +3222,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 3155 | if (!is_paging(vcpu)) { | 3222 | if (!is_paging(vcpu)) { |
| 3156 | hw_cr4 &= ~X86_CR4_PAE; | 3223 | hw_cr4 &= ~X86_CR4_PAE; |
| 3157 | hw_cr4 |= X86_CR4_PSE; | 3224 | hw_cr4 |= X86_CR4_PSE; |
| 3225 | /* | ||
| 3226 | * SMEP is disabled if CPU is in non-paging mode in | ||
| 3227 | * hardware. However KVM always uses paging mode to | ||
| 3228 | * emulate guest non-paging mode with TDP. | ||
| 3229 | * To emulate this behavior, SMEP needs to be manually | ||
| 3230 | * disabled when guest switches to non-paging mode. | ||
| 3231 | */ | ||
| 3232 | hw_cr4 &= ~X86_CR4_SMEP; | ||
| 3158 | } else if (!(cr4 & X86_CR4_PAE)) { | 3233 | } else if (!(cr4 & X86_CR4_PAE)) { |
| 3159 | hw_cr4 &= ~X86_CR4_PAE; | 3234 | hw_cr4 &= ~X86_CR4_PAE; |
| 3160 | } | 3235 | } |
| @@ -3171,10 +3246,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
| 3171 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3246 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3172 | u32 ar; | 3247 | u32 ar; |
| 3173 | 3248 | ||
| 3174 | if (vmx->rmode.vm86_active | 3249 | if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { |
| 3175 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES | ||
| 3176 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS | ||
| 3177 | || seg == VCPU_SREG_GS)) { | ||
| 3178 | *var = vmx->rmode.segs[seg]; | 3250 | *var = vmx->rmode.segs[seg]; |
| 3179 | if (seg == VCPU_SREG_TR | 3251 | if (seg == VCPU_SREG_TR |
| 3180 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) | 3252 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) |
| @@ -3187,8 +3259,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
| 3187 | var->limit = vmx_read_guest_seg_limit(vmx, seg); | 3259 | var->limit = vmx_read_guest_seg_limit(vmx, seg); |
| 3188 | var->selector = vmx_read_guest_seg_selector(vmx, seg); | 3260 | var->selector = vmx_read_guest_seg_selector(vmx, seg); |
| 3189 | ar = vmx_read_guest_seg_ar(vmx, seg); | 3261 | ar = vmx_read_guest_seg_ar(vmx, seg); |
| 3190 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | ||
| 3191 | ar = 0; | ||
| 3192 | var->type = ar & 15; | 3262 | var->type = ar & 15; |
| 3193 | var->s = (ar >> 4) & 1; | 3263 | var->s = (ar >> 4) & 1; |
| 3194 | var->dpl = (ar >> 5) & 3; | 3264 | var->dpl = (ar >> 5) & 3; |
| @@ -3211,8 +3281,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | |||
| 3211 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); | 3281 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); |
| 3212 | } | 3282 | } |
| 3213 | 3283 | ||
| 3214 | static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | 3284 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
| 3215 | { | 3285 | { |
| 3286 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 3287 | |||
| 3216 | if (!is_protmode(vcpu)) | 3288 | if (!is_protmode(vcpu)) |
| 3217 | return 0; | 3289 | return 0; |
| 3218 | 3290 | ||
| @@ -3220,24 +3292,9 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | |||
| 3220 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ | 3292 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ |
| 3221 | return 3; | 3293 | return 3; |
| 3222 | 3294 | ||
| 3223 | return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; | ||
| 3224 | } | ||
| 3225 | |||
| 3226 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | ||
| 3227 | { | ||
| 3228 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 3229 | |||
| 3230 | /* | ||
| 3231 | * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations | ||
| 3232 | * fail; use the cache instead. | ||
| 3233 | */ | ||
| 3234 | if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) { | ||
| 3235 | return vmx->cpl; | ||
| 3236 | } | ||
| 3237 | |||
| 3238 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { | 3295 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { |
| 3239 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3296 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
| 3240 | vmx->cpl = __vmx_get_cpl(vcpu); | 3297 | vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3; |
| 3241 | } | 3298 | } |
| 3242 | 3299 | ||
| 3243 | return vmx->cpl; | 3300 | return vmx->cpl; |
| @@ -3269,28 +3326,23 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
| 3269 | { | 3326 | { |
| 3270 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3327 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3271 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 3328 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
| 3272 | u32 ar; | ||
| 3273 | 3329 | ||
| 3274 | vmx_segment_cache_clear(vmx); | 3330 | vmx_segment_cache_clear(vmx); |
| 3331 | if (seg == VCPU_SREG_CS) | ||
| 3332 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
| 3275 | 3333 | ||
| 3276 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 3334 | if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { |
| 3277 | vmcs_write16(sf->selector, var->selector); | 3335 | vmx->rmode.segs[seg] = *var; |
| 3278 | vmx->rmode.segs[VCPU_SREG_TR] = *var; | 3336 | if (seg == VCPU_SREG_TR) |
| 3279 | return; | 3337 | vmcs_write16(sf->selector, var->selector); |
| 3338 | else if (var->s) | ||
| 3339 | fix_rmode_seg(seg, &vmx->rmode.segs[seg]); | ||
| 3340 | goto out; | ||
| 3280 | } | 3341 | } |
| 3342 | |||
| 3281 | vmcs_writel(sf->base, var->base); | 3343 | vmcs_writel(sf->base, var->base); |
| 3282 | vmcs_write32(sf->limit, var->limit); | 3344 | vmcs_write32(sf->limit, var->limit); |
| 3283 | vmcs_write16(sf->selector, var->selector); | 3345 | vmcs_write16(sf->selector, var->selector); |
| 3284 | if (vmx->rmode.vm86_active && var->s) { | ||
| 3285 | vmx->rmode.segs[seg] = *var; | ||
| 3286 | /* | ||
| 3287 | * Hack real-mode segments into vm86 compatibility. | ||
| 3288 | */ | ||
| 3289 | if (var->base == 0xffff0000 && var->selector == 0xf000) | ||
| 3290 | vmcs_writel(sf->base, 0xf0000); | ||
| 3291 | ar = 0xf3; | ||
| 3292 | } else | ||
| 3293 | ar = vmx_segment_access_rights(var); | ||
| 3294 | 3346 | ||
| 3295 | /* | 3347 | /* |
| 3296 | * Fix the "Accessed" bit in AR field of segment registers for older | 3348 | * Fix the "Accessed" bit in AR field of segment registers for older |
| @@ -3304,42 +3356,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
| 3304 | * kvm hack. | 3356 | * kvm hack. |
| 3305 | */ | 3357 | */ |
| 3306 | if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) | 3358 | if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) |
| 3307 | ar |= 0x1; /* Accessed */ | 3359 | var->type |= 0x1; /* Accessed */ |
| 3308 | 3360 | ||
| 3309 | vmcs_write32(sf->ar_bytes, ar); | 3361 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); |
| 3310 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
| 3311 | 3362 | ||
| 3312 | /* | 3363 | out: |
| 3313 | * Fix segments for real mode guest in hosts that don't have | 3364 | vmx->emulation_required |= emulation_required(vcpu); |
| 3314 | * "unrestricted_mode" or it was disabled. | ||
| 3315 | * This is done to allow migration of the guests from hosts with | ||
| 3316 | * unrestricted guest like Westmere to older host that don't have | ||
| 3317 | * unrestricted guest like Nehelem. | ||
| 3318 | */ | ||
| 3319 | if (vmx->rmode.vm86_active) { | ||
| 3320 | switch (seg) { | ||
| 3321 | case VCPU_SREG_CS: | ||
| 3322 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | ||
| 3323 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | ||
| 3324 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
| 3325 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
| 3326 | vmcs_write16(GUEST_CS_SELECTOR, | ||
| 3327 | vmcs_readl(GUEST_CS_BASE) >> 4); | ||
| 3328 | break; | ||
| 3329 | case VCPU_SREG_ES: | ||
| 3330 | case VCPU_SREG_DS: | ||
| 3331 | case VCPU_SREG_GS: | ||
| 3332 | case VCPU_SREG_FS: | ||
| 3333 | fix_rmode_seg(seg, &vmx->rmode.segs[seg]); | ||
| 3334 | break; | ||
| 3335 | case VCPU_SREG_SS: | ||
| 3336 | vmcs_write16(GUEST_SS_SELECTOR, | ||
| 3337 | vmcs_readl(GUEST_SS_BASE) >> 4); | ||
| 3338 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | ||
| 3339 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | ||
| 3340 | break; | ||
| 3341 | } | ||
| 3342 | } | ||
| 3343 | } | 3365 | } |
| 3344 | 3366 | ||
| 3345 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 3367 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
| @@ -3380,13 +3402,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | |||
| 3380 | u32 ar; | 3402 | u32 ar; |
| 3381 | 3403 | ||
| 3382 | vmx_get_segment(vcpu, &var, seg); | 3404 | vmx_get_segment(vcpu, &var, seg); |
| 3405 | var.dpl = 0x3; | ||
| 3406 | if (seg == VCPU_SREG_CS) | ||
| 3407 | var.type = 0x3; | ||
| 3383 | ar = vmx_segment_access_rights(&var); | 3408 | ar = vmx_segment_access_rights(&var); |
| 3384 | 3409 | ||
| 3385 | if (var.base != (var.selector << 4)) | 3410 | if (var.base != (var.selector << 4)) |
| 3386 | return false; | 3411 | return false; |
| 3387 | if (var.limit < 0xffff) | 3412 | if (var.limit != 0xffff) |
| 3388 | return false; | 3413 | return false; |
| 3389 | if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) | 3414 | if (ar != 0xf3) |
| 3390 | return false; | 3415 | return false; |
| 3391 | 3416 | ||
| 3392 | return true; | 3417 | return true; |
| @@ -3521,6 +3546,9 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | |||
| 3521 | */ | 3546 | */ |
| 3522 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | 3547 | static bool guest_state_valid(struct kvm_vcpu *vcpu) |
| 3523 | { | 3548 | { |
| 3549 | if (enable_unrestricted_guest) | ||
| 3550 | return true; | ||
| 3551 | |||
| 3524 | /* real mode guest state checks */ | 3552 | /* real mode guest state checks */ |
| 3525 | if (!is_protmode(vcpu)) { | 3553 | if (!is_protmode(vcpu)) { |
| 3526 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 3554 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
| @@ -3644,12 +3672,9 @@ static void seg_setup(int seg) | |||
| 3644 | vmcs_write16(sf->selector, 0); | 3672 | vmcs_write16(sf->selector, 0); |
| 3645 | vmcs_writel(sf->base, 0); | 3673 | vmcs_writel(sf->base, 0); |
| 3646 | vmcs_write32(sf->limit, 0xffff); | 3674 | vmcs_write32(sf->limit, 0xffff); |
| 3647 | if (enable_unrestricted_guest) { | 3675 | ar = 0x93; |
| 3648 | ar = 0x93; | 3676 | if (seg == VCPU_SREG_CS) |
| 3649 | if (seg == VCPU_SREG_CS) | 3677 | ar |= 0x08; /* code segment */ |
| 3650 | ar |= 0x08; /* code segment */ | ||
| 3651 | } else | ||
| 3652 | ar = 0xf3; | ||
| 3653 | 3678 | ||
| 3654 | vmcs_write32(sf->ar_bytes, ar); | 3679 | vmcs_write32(sf->ar_bytes, ar); |
| 3655 | } | 3680 | } |
| @@ -3667,7 +3692,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
| 3667 | kvm_userspace_mem.flags = 0; | 3692 | kvm_userspace_mem.flags = 0; |
| 3668 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; | 3693 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; |
| 3669 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3694 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
| 3670 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | 3695 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); |
| 3671 | if (r) | 3696 | if (r) |
| 3672 | goto out; | 3697 | goto out; |
| 3673 | 3698 | ||
| @@ -3697,7 +3722,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
| 3697 | kvm_userspace_mem.guest_phys_addr = | 3722 | kvm_userspace_mem.guest_phys_addr = |
| 3698 | kvm->arch.ept_identity_map_addr; | 3723 | kvm->arch.ept_identity_map_addr; |
| 3699 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3724 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
| 3700 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | 3725 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); |
| 3701 | if (r) | 3726 | if (r) |
| 3702 | goto out; | 3727 | goto out; |
| 3703 | 3728 | ||
| @@ -3739,7 +3764,10 @@ static void free_vpid(struct vcpu_vmx *vmx) | |||
| 3739 | spin_unlock(&vmx_vpid_lock); | 3764 | spin_unlock(&vmx_vpid_lock); |
| 3740 | } | 3765 | } |
| 3741 | 3766 | ||
| 3742 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 3767 | #define MSR_TYPE_R 1 |
| 3768 | #define MSR_TYPE_W 2 | ||
| 3769 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
| 3770 | u32 msr, int type) | ||
| 3743 | { | 3771 | { |
| 3744 | int f = sizeof(unsigned long); | 3772 | int f = sizeof(unsigned long); |
| 3745 | 3773 | ||
| @@ -3752,20 +3780,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | |||
| 3752 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | 3780 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. |
| 3753 | */ | 3781 | */ |
| 3754 | if (msr <= 0x1fff) { | 3782 | if (msr <= 0x1fff) { |
| 3755 | __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ | 3783 | if (type & MSR_TYPE_R) |
| 3756 | __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ | 3784 | /* read-low */ |
| 3785 | __clear_bit(msr, msr_bitmap + 0x000 / f); | ||
| 3786 | |||
| 3787 | if (type & MSR_TYPE_W) | ||
| 3788 | /* write-low */ | ||
| 3789 | __clear_bit(msr, msr_bitmap + 0x800 / f); | ||
| 3790 | |||
| 3757 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | 3791 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
| 3758 | msr &= 0x1fff; | 3792 | msr &= 0x1fff; |
| 3759 | __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ | 3793 | if (type & MSR_TYPE_R) |
| 3760 | __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ | 3794 | /* read-high */ |
| 3795 | __clear_bit(msr, msr_bitmap + 0x400 / f); | ||
| 3796 | |||
| 3797 | if (type & MSR_TYPE_W) | ||
| 3798 | /* write-high */ | ||
| 3799 | __clear_bit(msr, msr_bitmap + 0xc00 / f); | ||
| 3800 | |||
| 3801 | } | ||
| 3802 | } | ||
| 3803 | |||
| 3804 | static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | ||
| 3805 | u32 msr, int type) | ||
| 3806 | { | ||
| 3807 | int f = sizeof(unsigned long); | ||
| 3808 | |||
| 3809 | if (!cpu_has_vmx_msr_bitmap()) | ||
| 3810 | return; | ||
| 3811 | |||
| 3812 | /* | ||
| 3813 | * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals | ||
| 3814 | * have the write-low and read-high bitmap offsets the wrong way round. | ||
| 3815 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | ||
| 3816 | */ | ||
| 3817 | if (msr <= 0x1fff) { | ||
| 3818 | if (type & MSR_TYPE_R) | ||
| 3819 | /* read-low */ | ||
| 3820 | __set_bit(msr, msr_bitmap + 0x000 / f); | ||
| 3821 | |||
| 3822 | if (type & MSR_TYPE_W) | ||
| 3823 | /* write-low */ | ||
| 3824 | __set_bit(msr, msr_bitmap + 0x800 / f); | ||
| 3825 | |||
| 3826 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
| 3827 | msr &= 0x1fff; | ||
| 3828 | if (type & MSR_TYPE_R) | ||
| 3829 | /* read-high */ | ||
| 3830 | __set_bit(msr, msr_bitmap + 0x400 / f); | ||
| 3831 | |||
| 3832 | if (type & MSR_TYPE_W) | ||
| 3833 | /* write-high */ | ||
| 3834 | __set_bit(msr, msr_bitmap + 0xc00 / f); | ||
| 3835 | |||
| 3761 | } | 3836 | } |
| 3762 | } | 3837 | } |
| 3763 | 3838 | ||
| 3764 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | 3839 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) |
| 3765 | { | 3840 | { |
| 3766 | if (!longmode_only) | 3841 | if (!longmode_only) |
| 3767 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); | 3842 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, |
| 3768 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); | 3843 | msr, MSR_TYPE_R | MSR_TYPE_W); |
| 3844 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, | ||
| 3845 | msr, MSR_TYPE_R | MSR_TYPE_W); | ||
| 3846 | } | ||
| 3847 | |||
| 3848 | static void vmx_enable_intercept_msr_read_x2apic(u32 msr) | ||
| 3849 | { | ||
| 3850 | __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
| 3851 | msr, MSR_TYPE_R); | ||
| 3852 | __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
| 3853 | msr, MSR_TYPE_R); | ||
| 3854 | } | ||
| 3855 | |||
| 3856 | static void vmx_disable_intercept_msr_read_x2apic(u32 msr) | ||
| 3857 | { | ||
| 3858 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
| 3859 | msr, MSR_TYPE_R); | ||
| 3860 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
| 3861 | msr, MSR_TYPE_R); | ||
| 3862 | } | ||
| 3863 | |||
| 3864 | static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | ||
| 3865 | { | ||
| 3866 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
| 3867 | msr, MSR_TYPE_W); | ||
| 3868 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
| 3869 | msr, MSR_TYPE_W); | ||
| 3769 | } | 3870 | } |
| 3770 | 3871 | ||
| 3771 | /* | 3872 | /* |
| @@ -3844,6 +3945,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
| 3844 | return exec_control; | 3945 | return exec_control; |
| 3845 | } | 3946 | } |
| 3846 | 3947 | ||
| 3948 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
| 3949 | { | ||
| 3950 | return enable_apicv_reg_vid && irqchip_in_kernel(kvm); | ||
| 3951 | } | ||
| 3952 | |||
| 3847 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 3953 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
| 3848 | { | 3954 | { |
| 3849 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 3955 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
| @@ -3861,6 +3967,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 3861 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 3967 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
| 3862 | if (!ple_gap) | 3968 | if (!ple_gap) |
| 3863 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 3969 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
| 3970 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | ||
| 3971 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
| 3972 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
| 3973 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
| 3864 | return exec_control; | 3974 | return exec_control; |
| 3865 | } | 3975 | } |
| 3866 | 3976 | ||
| @@ -3905,6 +4015,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 3905 | vmx_secondary_exec_control(vmx)); | 4015 | vmx_secondary_exec_control(vmx)); |
| 3906 | } | 4016 | } |
| 3907 | 4017 | ||
| 4018 | if (enable_apicv_reg_vid) { | ||
| 4019 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | ||
| 4020 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | ||
| 4021 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | ||
| 4022 | vmcs_write64(EOI_EXIT_BITMAP3, 0); | ||
| 4023 | |||
| 4024 | vmcs_write16(GUEST_INTR_STATUS, 0); | ||
| 4025 | } | ||
| 4026 | |||
| 3908 | if (ple_gap) { | 4027 | if (ple_gap) { |
| 3909 | vmcs_write32(PLE_GAP, ple_gap); | 4028 | vmcs_write32(PLE_GAP, ple_gap); |
| 3910 | vmcs_write32(PLE_WINDOW, ple_window); | 4029 | vmcs_write32(PLE_WINDOW, ple_window); |
| @@ -3990,14 +4109,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 3990 | vmx_segment_cache_clear(vmx); | 4109 | vmx_segment_cache_clear(vmx); |
| 3991 | 4110 | ||
| 3992 | seg_setup(VCPU_SREG_CS); | 4111 | seg_setup(VCPU_SREG_CS); |
| 3993 | /* | 4112 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) |
| 3994 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | ||
| 3995 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. | ||
| 3996 | */ | ||
| 3997 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) { | ||
| 3998 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); | 4113 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); |
| 3999 | vmcs_writel(GUEST_CS_BASE, 0x000f0000); | 4114 | else { |
| 4000 | } else { | ||
| 4001 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); | 4115 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); |
| 4002 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); | 4116 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); |
| 4003 | } | 4117 | } |
| @@ -4073,9 +4187,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 4073 | 4187 | ||
| 4074 | ret = 0; | 4188 | ret = 0; |
| 4075 | 4189 | ||
| 4076 | /* HACK: Don't enable emulation on guest boot/reset */ | ||
| 4077 | vmx->emulation_required = 0; | ||
| 4078 | |||
| 4079 | return ret; | 4190 | return ret; |
| 4080 | } | 4191 | } |
| 4081 | 4192 | ||
| @@ -4251,7 +4362,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
| 4251 | .flags = 0, | 4362 | .flags = 0, |
| 4252 | }; | 4363 | }; |
| 4253 | 4364 | ||
| 4254 | ret = kvm_set_memory_region(kvm, &tss_mem, 0); | 4365 | ret = kvm_set_memory_region(kvm, &tss_mem, false); |
| 4255 | if (ret) | 4366 | if (ret) |
| 4256 | return ret; | 4367 | return ret; |
| 4257 | kvm->arch.tss_addr = addr; | 4368 | kvm->arch.tss_addr = addr; |
| @@ -4261,28 +4372,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
| 4261 | return 0; | 4372 | return 0; |
| 4262 | } | 4373 | } |
| 4263 | 4374 | ||
| 4264 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, | 4375 | static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) |
| 4265 | int vec, u32 err_code) | ||
| 4266 | { | 4376 | { |
| 4267 | /* | ||
| 4268 | * Instruction with address size override prefix opcode 0x67 | ||
| 4269 | * Cause the #SS fault with 0 error code in VM86 mode. | ||
| 4270 | */ | ||
| 4271 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | ||
| 4272 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) | ||
| 4273 | return 1; | ||
| 4274 | /* | ||
| 4275 | * Forward all other exceptions that are valid in real mode. | ||
| 4276 | * FIXME: Breaks guest debugging in real mode, needs to be fixed with | ||
| 4277 | * the required debugging infrastructure rework. | ||
| 4278 | */ | ||
| 4279 | switch (vec) { | 4377 | switch (vec) { |
| 4280 | case DB_VECTOR: | ||
| 4281 | if (vcpu->guest_debug & | ||
| 4282 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
| 4283 | return 0; | ||
| 4284 | kvm_queue_exception(vcpu, vec); | ||
| 4285 | return 1; | ||
| 4286 | case BP_VECTOR: | 4378 | case BP_VECTOR: |
| 4287 | /* | 4379 | /* |
| 4288 | * Update instruction length as we may reinject the exception | 4380 | * Update instruction length as we may reinject the exception |
| @@ -4291,7 +4383,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
| 4291 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = | 4383 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = |
| 4292 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 4384 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
| 4293 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 4385 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
| 4294 | return 0; | 4386 | return false; |
| 4387 | /* fall through */ | ||
| 4388 | case DB_VECTOR: | ||
| 4389 | if (vcpu->guest_debug & | ||
| 4390 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
| 4391 | return false; | ||
| 4295 | /* fall through */ | 4392 | /* fall through */ |
| 4296 | case DE_VECTOR: | 4393 | case DE_VECTOR: |
| 4297 | case OF_VECTOR: | 4394 | case OF_VECTOR: |
| @@ -4301,10 +4398,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
| 4301 | case SS_VECTOR: | 4398 | case SS_VECTOR: |
| 4302 | case GP_VECTOR: | 4399 | case GP_VECTOR: |
| 4303 | case MF_VECTOR: | 4400 | case MF_VECTOR: |
| 4304 | kvm_queue_exception(vcpu, vec); | 4401 | return true; |
| 4305 | return 1; | 4402 | break; |
| 4306 | } | 4403 | } |
| 4307 | return 0; | 4404 | return false; |
| 4405 | } | ||
| 4406 | |||
| 4407 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, | ||
| 4408 | int vec, u32 err_code) | ||
| 4409 | { | ||
| 4410 | /* | ||
| 4411 | * Instruction with address size override prefix opcode 0x67 | ||
| 4412 | * Cause the #SS fault with 0 error code in VM86 mode. | ||
| 4413 | */ | ||
| 4414 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { | ||
| 4415 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { | ||
| 4416 | if (vcpu->arch.halt_request) { | ||
| 4417 | vcpu->arch.halt_request = 0; | ||
| 4418 | return kvm_emulate_halt(vcpu); | ||
| 4419 | } | ||
| 4420 | return 1; | ||
| 4421 | } | ||
| 4422 | return 0; | ||
| 4423 | } | ||
| 4424 | |||
| 4425 | /* | ||
| 4426 | * Forward all other exceptions that are valid in real mode. | ||
| 4427 | * FIXME: Breaks guest debugging in real mode, needs to be fixed with | ||
| 4428 | * the required debugging infrastructure rework. | ||
| 4429 | */ | ||
| 4430 | kvm_queue_exception(vcpu, vec); | ||
| 4431 | return 1; | ||
| 4308 | } | 4432 | } |
| 4309 | 4433 | ||
| 4310 | /* | 4434 | /* |
| @@ -4392,17 +4516,11 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
| 4392 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); | 4516 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); |
| 4393 | } | 4517 | } |
| 4394 | 4518 | ||
| 4395 | if (vmx->rmode.vm86_active && | ||
| 4396 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, | ||
| 4397 | error_code)) { | ||
| 4398 | if (vcpu->arch.halt_request) { | ||
| 4399 | vcpu->arch.halt_request = 0; | ||
| 4400 | return kvm_emulate_halt(vcpu); | ||
| 4401 | } | ||
| 4402 | return 1; | ||
| 4403 | } | ||
| 4404 | |||
| 4405 | ex_no = intr_info & INTR_INFO_VECTOR_MASK; | 4519 | ex_no = intr_info & INTR_INFO_VECTOR_MASK; |
| 4520 | |||
| 4521 | if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) | ||
| 4522 | return handle_rmode_exception(vcpu, ex_no, error_code); | ||
| 4523 | |||
| 4406 | switch (ex_no) { | 4524 | switch (ex_no) { |
| 4407 | case DB_VECTOR: | 4525 | case DB_VECTOR: |
| 4408 | dr6 = vmcs_readl(EXIT_QUALIFICATION); | 4526 | dr6 = vmcs_readl(EXIT_QUALIFICATION); |
| @@ -4820,6 +4938,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) | |||
| 4820 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | 4938 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
| 4821 | } | 4939 | } |
| 4822 | 4940 | ||
| 4941 | static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) | ||
| 4942 | { | ||
| 4943 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
| 4944 | int vector = exit_qualification & 0xff; | ||
| 4945 | |||
| 4946 | /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ | ||
| 4947 | kvm_apic_set_eoi_accelerated(vcpu, vector); | ||
| 4948 | return 1; | ||
| 4949 | } | ||
| 4950 | |||
| 4951 | static int handle_apic_write(struct kvm_vcpu *vcpu) | ||
| 4952 | { | ||
| 4953 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
| 4954 | u32 offset = exit_qualification & 0xfff; | ||
| 4955 | |||
| 4956 | /* APIC-write VM exit is trap-like and thus no need to adjust IP */ | ||
| 4957 | kvm_apic_write_nodecode(vcpu, offset); | ||
| 4958 | return 1; | ||
| 4959 | } | ||
| 4960 | |||
| 4823 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 4961 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
| 4824 | { | 4962 | { |
| 4825 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4963 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -5065,7 +5203,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
| 5065 | schedule(); | 5203 | schedule(); |
| 5066 | } | 5204 | } |
| 5067 | 5205 | ||
| 5068 | vmx->emulation_required = !guest_state_valid(vcpu); | 5206 | vmx->emulation_required = emulation_required(vcpu); |
| 5069 | out: | 5207 | out: |
| 5070 | return ret; | 5208 | return ret; |
| 5071 | } | 5209 | } |
| @@ -5754,6 +5892,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
| 5754 | [EXIT_REASON_VMON] = handle_vmon, | 5892 | [EXIT_REASON_VMON] = handle_vmon, |
| 5755 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 5893 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
| 5756 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 5894 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
| 5895 | [EXIT_REASON_APIC_WRITE] = handle_apic_write, | ||
| 5896 | [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, | ||
| 5757 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 5897 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
| 5758 | [EXIT_REASON_XSETBV] = handle_xsetbv, | 5898 | [EXIT_REASON_XSETBV] = handle_xsetbv, |
| 5759 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 5899 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
| @@ -5780,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
| 5780 | u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; | 5920 | u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; |
| 5781 | gpa_t bitmap; | 5921 | gpa_t bitmap; |
| 5782 | 5922 | ||
| 5783 | if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) | 5923 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) |
| 5784 | return 1; | 5924 | return 1; |
| 5785 | 5925 | ||
| 5786 | /* | 5926 | /* |
| @@ -6008,7 +6148,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
| 6008 | u32 vectoring_info = vmx->idt_vectoring_info; | 6148 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 6009 | 6149 | ||
| 6010 | /* If guest state is invalid, start emulating */ | 6150 | /* If guest state is invalid, start emulating */ |
| 6011 | if (vmx->emulation_required && emulate_invalid_guest_state) | 6151 | if (vmx->emulation_required) |
| 6012 | return handle_invalid_guest_state(vcpu); | 6152 | return handle_invalid_guest_state(vcpu); |
| 6013 | 6153 | ||
| 6014 | /* | 6154 | /* |
| @@ -6103,6 +6243,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
| 6103 | vmcs_write32(TPR_THRESHOLD, irr); | 6243 | vmcs_write32(TPR_THRESHOLD, irr); |
| 6104 | } | 6244 | } |
| 6105 | 6245 | ||
| 6246 | static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
| 6247 | { | ||
| 6248 | u32 sec_exec_control; | ||
| 6249 | |||
| 6250 | /* | ||
| 6251 | * There is not point to enable virtualize x2apic without enable | ||
| 6252 | * apicv | ||
| 6253 | */ | ||
| 6254 | if (!cpu_has_vmx_virtualize_x2apic_mode() || | ||
| 6255 | !vmx_vm_has_apicv(vcpu->kvm)) | ||
| 6256 | return; | ||
| 6257 | |||
| 6258 | if (!vm_need_tpr_shadow(vcpu->kvm)) | ||
| 6259 | return; | ||
| 6260 | |||
| 6261 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
| 6262 | |||
| 6263 | if (set) { | ||
| 6264 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
| 6265 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
| 6266 | } else { | ||
| 6267 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
| 6268 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
| 6269 | } | ||
| 6270 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); | ||
| 6271 | |||
| 6272 | vmx_set_msr_bitmap(vcpu); | ||
| 6273 | } | ||
| 6274 | |||
| 6275 | static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) | ||
| 6276 | { | ||
| 6277 | u16 status; | ||
| 6278 | u8 old; | ||
| 6279 | |||
| 6280 | if (!vmx_vm_has_apicv(kvm)) | ||
| 6281 | return; | ||
| 6282 | |||
| 6283 | if (isr == -1) | ||
| 6284 | isr = 0; | ||
| 6285 | |||
| 6286 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
| 6287 | old = status >> 8; | ||
| 6288 | if (isr != old) { | ||
| 6289 | status &= 0xff; | ||
| 6290 | status |= isr << 8; | ||
| 6291 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
| 6292 | } | ||
| 6293 | } | ||
| 6294 | |||
| 6295 | static void vmx_set_rvi(int vector) | ||
| 6296 | { | ||
| 6297 | u16 status; | ||
| 6298 | u8 old; | ||
| 6299 | |||
| 6300 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
| 6301 | old = (u8)status & 0xff; | ||
| 6302 | if ((u8)vector != old) { | ||
| 6303 | status &= ~0xff; | ||
| 6304 | status |= (u8)vector; | ||
| 6305 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
| 6306 | } | ||
| 6307 | } | ||
| 6308 | |||
| 6309 | static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | ||
| 6310 | { | ||
| 6311 | if (max_irr == -1) | ||
| 6312 | return; | ||
| 6313 | |||
| 6314 | vmx_set_rvi(max_irr); | ||
| 6315 | } | ||
| 6316 | |||
| 6317 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
| 6318 | { | ||
| 6319 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | ||
| 6320 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); | ||
| 6321 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); | ||
| 6322 | vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); | ||
| 6323 | } | ||
| 6324 | |||
| 6106 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 6325 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
| 6107 | { | 6326 | { |
| 6108 | u32 exit_intr_info; | 6327 | u32 exit_intr_info; |
| @@ -6291,7 +6510,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 6291 | 6510 | ||
| 6292 | /* Don't enter VMX if guest state is invalid, let the exit handler | 6511 | /* Don't enter VMX if guest state is invalid, let the exit handler |
| 6293 | start emulation until we arrive back to a valid state */ | 6512 | start emulation until we arrive back to a valid state */ |
| 6294 | if (vmx->emulation_required && emulate_invalid_guest_state) | 6513 | if (vmx->emulation_required) |
| 6295 | return; | 6514 | return; |
| 6296 | 6515 | ||
| 6297 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6516 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
| @@ -7366,6 +7585,11 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 7366 | .enable_nmi_window = enable_nmi_window, | 7585 | .enable_nmi_window = enable_nmi_window, |
| 7367 | .enable_irq_window = enable_irq_window, | 7586 | .enable_irq_window = enable_irq_window, |
| 7368 | .update_cr8_intercept = update_cr8_intercept, | 7587 | .update_cr8_intercept = update_cr8_intercept, |
| 7588 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, | ||
| 7589 | .vm_has_apicv = vmx_vm_has_apicv, | ||
| 7590 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | ||
| 7591 | .hwapic_irr_update = vmx_hwapic_irr_update, | ||
| 7592 | .hwapic_isr_update = vmx_hwapic_isr_update, | ||
| 7369 | 7593 | ||
| 7370 | .set_tss_addr = vmx_set_tss_addr, | 7594 | .set_tss_addr = vmx_set_tss_addr, |
| 7371 | .get_tdp_level = get_ept_level, | 7595 | .get_tdp_level = get_ept_level, |
| @@ -7398,7 +7622,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 7398 | 7622 | ||
| 7399 | static int __init vmx_init(void) | 7623 | static int __init vmx_init(void) |
| 7400 | { | 7624 | { |
| 7401 | int r, i; | 7625 | int r, i, msr; |
| 7402 | 7626 | ||
| 7403 | rdmsrl_safe(MSR_EFER, &host_efer); | 7627 | rdmsrl_safe(MSR_EFER, &host_efer); |
| 7404 | 7628 | ||
| @@ -7419,11 +7643,19 @@ static int __init vmx_init(void) | |||
| 7419 | if (!vmx_msr_bitmap_legacy) | 7643 | if (!vmx_msr_bitmap_legacy) |
| 7420 | goto out1; | 7644 | goto out1; |
| 7421 | 7645 | ||
| 7646 | vmx_msr_bitmap_legacy_x2apic = | ||
| 7647 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
| 7648 | if (!vmx_msr_bitmap_legacy_x2apic) | ||
| 7649 | goto out2; | ||
| 7422 | 7650 | ||
| 7423 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | 7651 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); |
| 7424 | if (!vmx_msr_bitmap_longmode) | 7652 | if (!vmx_msr_bitmap_longmode) |
| 7425 | goto out2; | 7653 | goto out3; |
| 7426 | 7654 | ||
| 7655 | vmx_msr_bitmap_longmode_x2apic = | ||
| 7656 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
| 7657 | if (!vmx_msr_bitmap_longmode_x2apic) | ||
| 7658 | goto out4; | ||
| 7427 | 7659 | ||
| 7428 | /* | 7660 | /* |
| 7429 | * Allow direct access to the PC debug port (it is often used for I/O | 7661 | * Allow direct access to the PC debug port (it is often used for I/O |
| @@ -7455,6 +7687,28 @@ static int __init vmx_init(void) | |||
| 7455 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | 7687 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
| 7456 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | 7688 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
| 7457 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 7689 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
| 7690 | memcpy(vmx_msr_bitmap_legacy_x2apic, | ||
| 7691 | vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
| 7692 | memcpy(vmx_msr_bitmap_longmode_x2apic, | ||
| 7693 | vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
| 7694 | |||
| 7695 | if (enable_apicv_reg_vid) { | ||
| 7696 | for (msr = 0x800; msr <= 0x8ff; msr++) | ||
| 7697 | vmx_disable_intercept_msr_read_x2apic(msr); | ||
| 7698 | |||
| 7699 | /* According SDM, in x2apic mode, the whole id reg is used. | ||
| 7700 | * But in KVM, it only use the highest eight bits. Need to | ||
| 7701 | * intercept it */ | ||
| 7702 | vmx_enable_intercept_msr_read_x2apic(0x802); | ||
| 7703 | /* TMCCT */ | ||
| 7704 | vmx_enable_intercept_msr_read_x2apic(0x839); | ||
| 7705 | /* TPR */ | ||
| 7706 | vmx_disable_intercept_msr_write_x2apic(0x808); | ||
| 7707 | /* EOI */ | ||
| 7708 | vmx_disable_intercept_msr_write_x2apic(0x80b); | ||
| 7709 | /* SELF-IPI */ | ||
| 7710 | vmx_disable_intercept_msr_write_x2apic(0x83f); | ||
| 7711 | } | ||
| 7458 | 7712 | ||
| 7459 | if (enable_ept) { | 7713 | if (enable_ept) { |
| 7460 | kvm_mmu_set_mask_ptes(0ull, | 7714 | kvm_mmu_set_mask_ptes(0ull, |
| @@ -7468,8 +7722,10 @@ static int __init vmx_init(void) | |||
| 7468 | 7722 | ||
| 7469 | return 0; | 7723 | return 0; |
| 7470 | 7724 | ||
| 7471 | out3: | 7725 | out4: |
| 7472 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 7726 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
| 7727 | out3: | ||
| 7728 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
| 7473 | out2: | 7729 | out2: |
| 7474 | free_page((unsigned long)vmx_msr_bitmap_legacy); | 7730 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
| 7475 | out1: | 7731 | out1: |
| @@ -7481,6 +7737,8 @@ out: | |||
| 7481 | 7737 | ||
| 7482 | static void __exit vmx_exit(void) | 7738 | static void __exit vmx_exit(void) |
| 7483 | { | 7739 | { |
| 7740 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
| 7741 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
| 7484 | free_page((unsigned long)vmx_msr_bitmap_legacy); | 7742 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
| 7485 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 7743 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
| 7486 | free_page((unsigned long)vmx_io_bitmap_b); | 7744 | free_page((unsigned long)vmx_io_bitmap_b); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76f54461f7cb..f71500af1f81 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -120,7 +120,7 @@ struct kvm_shared_msrs { | |||
| 120 | }; | 120 | }; |
| 121 | 121 | ||
| 122 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | 122 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; |
| 123 | static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); | 123 | static struct kvm_shared_msrs __percpu *shared_msrs; |
| 124 | 124 | ||
| 125 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 125 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
| 126 | { "pf_fixed", VCPU_STAT(pf_fixed) }, | 126 | { "pf_fixed", VCPU_STAT(pf_fixed) }, |
| @@ -191,10 +191,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn) | |||
| 191 | 191 | ||
| 192 | static void shared_msr_update(unsigned slot, u32 msr) | 192 | static void shared_msr_update(unsigned slot, u32 msr) |
| 193 | { | 193 | { |
| 194 | struct kvm_shared_msrs *smsr; | ||
| 195 | u64 value; | 194 | u64 value; |
| 195 | unsigned int cpu = smp_processor_id(); | ||
| 196 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | ||
| 196 | 197 | ||
| 197 | smsr = &__get_cpu_var(shared_msrs); | ||
| 198 | /* only read, and nobody should modify it at this time, | 198 | /* only read, and nobody should modify it at this time, |
| 199 | * so don't need lock */ | 199 | * so don't need lock */ |
| 200 | if (slot >= shared_msrs_global.nr) { | 200 | if (slot >= shared_msrs_global.nr) { |
| @@ -226,7 +226,8 @@ static void kvm_shared_msr_cpu_online(void) | |||
| 226 | 226 | ||
| 227 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | 227 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) |
| 228 | { | 228 | { |
| 229 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | 229 | unsigned int cpu = smp_processor_id(); |
| 230 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | ||
| 230 | 231 | ||
| 231 | if (((value ^ smsr->values[slot].curr) & mask) == 0) | 232 | if (((value ^ smsr->values[slot].curr) & mask) == 0) |
| 232 | return; | 233 | return; |
| @@ -242,7 +243,8 @@ EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | |||
| 242 | 243 | ||
| 243 | static void drop_user_return_notifiers(void *ignore) | 244 | static void drop_user_return_notifiers(void *ignore) |
| 244 | { | 245 | { |
| 245 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | 246 | unsigned int cpu = smp_processor_id(); |
| 247 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | ||
| 246 | 248 | ||
| 247 | if (smsr->registered) | 249 | if (smsr->registered) |
| 248 | kvm_on_user_return(&smsr->urn); | 250 | kvm_on_user_return(&smsr->urn); |
| @@ -870,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
| 870 | 872 | ||
| 871 | kvm_x86_ops->set_efer(vcpu, efer); | 873 | kvm_x86_ops->set_efer(vcpu, efer); |
| 872 | 874 | ||
| 873 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | ||
| 874 | |||
| 875 | /* Update reserved bits */ | 875 | /* Update reserved bits */ |
| 876 | if ((efer ^ old_efer) & EFER_NX) | 876 | if ((efer ^ old_efer) & EFER_NX) |
| 877 | kvm_mmu_reset_context(vcpu); | 877 | kvm_mmu_reset_context(vcpu); |
| @@ -1879,6 +1879,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 1879 | u64 data = msr_info->data; | 1879 | u64 data = msr_info->data; |
| 1880 | 1880 | ||
| 1881 | switch (msr) { | 1881 | switch (msr) { |
| 1882 | case MSR_AMD64_NB_CFG: | ||
| 1883 | case MSR_IA32_UCODE_REV: | ||
| 1884 | case MSR_IA32_UCODE_WRITE: | ||
| 1885 | case MSR_VM_HSAVE_PA: | ||
| 1886 | case MSR_AMD64_PATCH_LOADER: | ||
| 1887 | case MSR_AMD64_BU_CFG2: | ||
| 1888 | break; | ||
| 1889 | |||
| 1882 | case MSR_EFER: | 1890 | case MSR_EFER: |
| 1883 | return set_efer(vcpu, data); | 1891 | return set_efer(vcpu, data); |
| 1884 | case MSR_K7_HWCR: | 1892 | case MSR_K7_HWCR: |
| @@ -1898,8 +1906,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 1898 | return 1; | 1906 | return 1; |
| 1899 | } | 1907 | } |
| 1900 | break; | 1908 | break; |
| 1901 | case MSR_AMD64_NB_CFG: | ||
| 1902 | break; | ||
| 1903 | case MSR_IA32_DEBUGCTLMSR: | 1909 | case MSR_IA32_DEBUGCTLMSR: |
| 1904 | if (!data) { | 1910 | if (!data) { |
| 1905 | /* We support the non-activated case already */ | 1911 | /* We support the non-activated case already */ |
| @@ -1912,11 +1918,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 1912 | vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", | 1918 | vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", |
| 1913 | __func__, data); | 1919 | __func__, data); |
| 1914 | break; | 1920 | break; |
| 1915 | case MSR_IA32_UCODE_REV: | ||
| 1916 | case MSR_IA32_UCODE_WRITE: | ||
| 1917 | case MSR_VM_HSAVE_PA: | ||
| 1918 | case MSR_AMD64_PATCH_LOADER: | ||
| 1919 | break; | ||
| 1920 | case 0x200 ... 0x2ff: | 1921 | case 0x200 ... 0x2ff: |
| 1921 | return set_msr_mtrr(vcpu, msr, data); | 1922 | return set_msr_mtrr(vcpu, msr, data); |
| 1922 | case MSR_IA32_APICBASE: | 1923 | case MSR_IA32_APICBASE: |
| @@ -2251,6 +2252,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 2251 | case MSR_K8_INT_PENDING_MSG: | 2252 | case MSR_K8_INT_PENDING_MSG: |
| 2252 | case MSR_AMD64_NB_CFG: | 2253 | case MSR_AMD64_NB_CFG: |
| 2253 | case MSR_FAM10H_MMIO_CONF_BASE: | 2254 | case MSR_FAM10H_MMIO_CONF_BASE: |
| 2255 | case MSR_AMD64_BU_CFG2: | ||
| 2254 | data = 0; | 2256 | data = 0; |
| 2255 | break; | 2257 | break; |
| 2256 | case MSR_P6_PERFCTR0: | 2258 | case MSR_P6_PERFCTR0: |
| @@ -2518,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 2518 | r = KVM_MAX_VCPUS; | 2520 | r = KVM_MAX_VCPUS; |
| 2519 | break; | 2521 | break; |
| 2520 | case KVM_CAP_NR_MEMSLOTS: | 2522 | case KVM_CAP_NR_MEMSLOTS: |
| 2521 | r = KVM_MEMORY_SLOTS; | 2523 | r = KVM_USER_MEM_SLOTS; |
| 2522 | break; | 2524 | break; |
| 2523 | case KVM_CAP_PV_MMU: /* obsolete */ | 2525 | case KVM_CAP_PV_MMU: /* obsolete */ |
| 2524 | r = 0; | 2526 | r = 0; |
| @@ -3270,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
| 3270 | return -EINVAL; | 3272 | return -EINVAL; |
| 3271 | 3273 | ||
| 3272 | mutex_lock(&kvm->slots_lock); | 3274 | mutex_lock(&kvm->slots_lock); |
| 3273 | spin_lock(&kvm->mmu_lock); | ||
| 3274 | 3275 | ||
| 3275 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 3276 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
| 3276 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 3277 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
| 3277 | 3278 | ||
| 3278 | spin_unlock(&kvm->mmu_lock); | ||
| 3279 | mutex_unlock(&kvm->slots_lock); | 3279 | mutex_unlock(&kvm->slots_lock); |
| 3280 | return 0; | 3280 | return 0; |
| 3281 | } | 3281 | } |
| @@ -3435,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | |||
| 3435 | mutex_lock(&kvm->slots_lock); | 3435 | mutex_lock(&kvm->slots_lock); |
| 3436 | 3436 | ||
| 3437 | r = -EINVAL; | 3437 | r = -EINVAL; |
| 3438 | if (log->slot >= KVM_MEMORY_SLOTS) | 3438 | if (log->slot >= KVM_USER_MEM_SLOTS) |
| 3439 | goto out; | 3439 | goto out; |
| 3440 | 3440 | ||
| 3441 | memslot = id_to_memslot(kvm->memslots, log->slot); | 3441 | memslot = id_to_memslot(kvm->memslots, log->slot); |
| @@ -4491,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, | |||
| 4491 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); | 4491 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); |
| 4492 | *selector = var.selector; | 4492 | *selector = var.selector; |
| 4493 | 4493 | ||
| 4494 | if (var.unusable) | 4494 | if (var.unusable) { |
| 4495 | memset(desc, 0, sizeof(*desc)); | ||
| 4495 | return false; | 4496 | return false; |
| 4497 | } | ||
| 4496 | 4498 | ||
| 4497 | if (var.g) | 4499 | if (var.g) |
| 4498 | var.limit >>= 12; | 4500 | var.limit >>= 12; |
| @@ -4753,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
| 4753 | return r; | 4755 | return r; |
| 4754 | } | 4756 | } |
| 4755 | 4757 | ||
| 4756 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | 4758 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, |
| 4759 | bool write_fault_to_shadow_pgtable) | ||
| 4757 | { | 4760 | { |
| 4758 | gpa_t gpa; | 4761 | gpa_t gpa = cr2; |
| 4759 | pfn_t pfn; | 4762 | pfn_t pfn; |
| 4760 | 4763 | ||
| 4761 | if (tdp_enabled) | 4764 | if (!vcpu->arch.mmu.direct_map) { |
| 4762 | return false; | 4765 | /* |
| 4763 | 4766 | * Write permission should be allowed since only | |
| 4764 | /* | 4767 | * write access need to be emulated. |
| 4765 | * if emulation was due to access to shadowed page table | 4768 | */ |
| 4766 | * and it failed try to unshadow page and re-enter the | 4769 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); |
| 4767 | * guest to let CPU execute the instruction. | ||
| 4768 | */ | ||
| 4769 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | ||
| 4770 | return true; | ||
| 4771 | |||
| 4772 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); | ||
| 4773 | 4770 | ||
| 4774 | if (gpa == UNMAPPED_GVA) | 4771 | /* |
| 4775 | return true; /* let cpu generate fault */ | 4772 | * If the mapping is invalid in guest, let cpu retry |
| 4773 | * it to generate fault. | ||
| 4774 | */ | ||
| 4775 | if (gpa == UNMAPPED_GVA) | ||
| 4776 | return true; | ||
| 4777 | } | ||
| 4776 | 4778 | ||
| 4777 | /* | 4779 | /* |
| 4778 | * Do not retry the unhandleable instruction if it faults on the | 4780 | * Do not retry the unhandleable instruction if it faults on the |
| @@ -4781,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 4781 | * instruction -> ... | 4783 | * instruction -> ... |
| 4782 | */ | 4784 | */ |
| 4783 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); | 4785 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); |
| 4784 | if (!is_error_noslot_pfn(pfn)) { | 4786 | |
| 4785 | kvm_release_pfn_clean(pfn); | 4787 | /* |
| 4788 | * If the instruction failed on the error pfn, it can not be fixed, | ||
| 4789 | * report the error to userspace. | ||
| 4790 | */ | ||
| 4791 | if (is_error_noslot_pfn(pfn)) | ||
| 4792 | return false; | ||
| 4793 | |||
| 4794 | kvm_release_pfn_clean(pfn); | ||
| 4795 | |||
| 4796 | /* The instructions are well-emulated on direct mmu. */ | ||
| 4797 | if (vcpu->arch.mmu.direct_map) { | ||
| 4798 | unsigned int indirect_shadow_pages; | ||
| 4799 | |||
| 4800 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 4801 | indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; | ||
| 4802 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 4803 | |||
| 4804 | if (indirect_shadow_pages) | ||
| 4805 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); | ||
| 4806 | |||
| 4786 | return true; | 4807 | return true; |
| 4787 | } | 4808 | } |
| 4788 | 4809 | ||
| 4789 | return false; | 4810 | /* |
| 4811 | * if emulation was due to access to shadowed page table | ||
| 4812 | * and it failed try to unshadow page and re-enter the | ||
| 4813 | * guest to let CPU execute the instruction. | ||
| 4814 | */ | ||
| 4815 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); | ||
| 4816 | |||
| 4817 | /* | ||
| 4818 | * If the access faults on its page table, it can not | ||
| 4819 | * be fixed by unprotecting shadow page and it should | ||
| 4820 | * be reported to userspace. | ||
| 4821 | */ | ||
| 4822 | return !write_fault_to_shadow_pgtable; | ||
| 4790 | } | 4823 | } |
| 4791 | 4824 | ||
| 4792 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | 4825 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, |
| @@ -4828,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | |||
| 4828 | if (!vcpu->arch.mmu.direct_map) | 4861 | if (!vcpu->arch.mmu.direct_map) |
| 4829 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); | 4862 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); |
| 4830 | 4863 | ||
| 4831 | kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 4864 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); |
| 4832 | 4865 | ||
| 4833 | return true; | 4866 | return true; |
| 4834 | } | 4867 | } |
| @@ -4845,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 4845 | int r; | 4878 | int r; |
| 4846 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4879 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
| 4847 | bool writeback = true; | 4880 | bool writeback = true; |
| 4881 | bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; | ||
| 4848 | 4882 | ||
| 4883 | /* | ||
| 4884 | * Clear write_fault_to_shadow_pgtable here to ensure it is | ||
| 4885 | * never reused. | ||
| 4886 | */ | ||
| 4887 | vcpu->arch.write_fault_to_shadow_pgtable = false; | ||
| 4849 | kvm_clear_exception_queue(vcpu); | 4888 | kvm_clear_exception_queue(vcpu); |
| 4850 | 4889 | ||
| 4851 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4890 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
| @@ -4864,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 4864 | if (r != EMULATION_OK) { | 4903 | if (r != EMULATION_OK) { |
| 4865 | if (emulation_type & EMULTYPE_TRAP_UD) | 4904 | if (emulation_type & EMULTYPE_TRAP_UD) |
| 4866 | return EMULATE_FAIL; | 4905 | return EMULATE_FAIL; |
| 4867 | if (reexecute_instruction(vcpu, cr2)) | 4906 | if (reexecute_instruction(vcpu, cr2, |
| 4907 | write_fault_to_spt)) | ||
| 4868 | return EMULATE_DONE; | 4908 | return EMULATE_DONE; |
| 4869 | if (emulation_type & EMULTYPE_SKIP) | 4909 | if (emulation_type & EMULTYPE_SKIP) |
| 4870 | return EMULATE_FAIL; | 4910 | return EMULATE_FAIL; |
| @@ -4894,7 +4934,7 @@ restart: | |||
| 4894 | return EMULATE_DONE; | 4934 | return EMULATE_DONE; |
| 4895 | 4935 | ||
| 4896 | if (r == EMULATION_FAILED) { | 4936 | if (r == EMULATION_FAILED) { |
| 4897 | if (reexecute_instruction(vcpu, cr2)) | 4937 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) |
| 4898 | return EMULATE_DONE; | 4938 | return EMULATE_DONE; |
| 4899 | 4939 | ||
| 4900 | return handle_emulation_failure(vcpu); | 4940 | return handle_emulation_failure(vcpu); |
| @@ -5233,9 +5273,16 @@ int kvm_arch_init(void *opaque) | |||
| 5233 | goto out; | 5273 | goto out; |
| 5234 | } | 5274 | } |
| 5235 | 5275 | ||
| 5276 | r = -ENOMEM; | ||
| 5277 | shared_msrs = alloc_percpu(struct kvm_shared_msrs); | ||
| 5278 | if (!shared_msrs) { | ||
| 5279 | printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n"); | ||
| 5280 | goto out; | ||
| 5281 | } | ||
| 5282 | |||
| 5236 | r = kvm_mmu_module_init(); | 5283 | r = kvm_mmu_module_init(); |
| 5237 | if (r) | 5284 | if (r) |
| 5238 | goto out; | 5285 | goto out_free_percpu; |
| 5239 | 5286 | ||
| 5240 | kvm_set_mmio_spte_mask(); | 5287 | kvm_set_mmio_spte_mask(); |
| 5241 | kvm_init_msr_list(); | 5288 | kvm_init_msr_list(); |
| @@ -5258,6 +5305,8 @@ int kvm_arch_init(void *opaque) | |||
| 5258 | 5305 | ||
| 5259 | return 0; | 5306 | return 0; |
| 5260 | 5307 | ||
| 5308 | out_free_percpu: | ||
| 5309 | free_percpu(shared_msrs); | ||
| 5261 | out: | 5310 | out: |
| 5262 | return r; | 5311 | return r; |
| 5263 | } | 5312 | } |
| @@ -5275,6 +5324,7 @@ void kvm_arch_exit(void) | |||
| 5275 | #endif | 5324 | #endif |
| 5276 | kvm_x86_ops = NULL; | 5325 | kvm_x86_ops = NULL; |
| 5277 | kvm_mmu_module_exit(); | 5326 | kvm_mmu_module_exit(); |
| 5327 | free_percpu(shared_msrs); | ||
| 5278 | } | 5328 | } |
| 5279 | 5329 | ||
| 5280 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | 5330 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) |
| @@ -5527,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
| 5527 | vcpu->arch.nmi_injected = true; | 5577 | vcpu->arch.nmi_injected = true; |
| 5528 | kvm_x86_ops->set_nmi(vcpu); | 5578 | kvm_x86_ops->set_nmi(vcpu); |
| 5529 | } | 5579 | } |
| 5530 | } else if (kvm_cpu_has_interrupt(vcpu)) { | 5580 | } else if (kvm_cpu_has_injectable_intr(vcpu)) { |
| 5531 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { | 5581 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { |
| 5532 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), | 5582 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), |
| 5533 | false); | 5583 | false); |
| @@ -5595,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
| 5595 | #endif | 5645 | #endif |
| 5596 | } | 5646 | } |
| 5597 | 5647 | ||
| 5648 | static void update_eoi_exitmap(struct kvm_vcpu *vcpu) | ||
| 5649 | { | ||
| 5650 | u64 eoi_exit_bitmap[4]; | ||
| 5651 | |||
| 5652 | memset(eoi_exit_bitmap, 0, 32); | ||
| 5653 | |||
| 5654 | kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); | ||
| 5655 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | ||
| 5656 | } | ||
| 5657 | |||
| 5598 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5658 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
| 5599 | { | 5659 | { |
| 5600 | int r; | 5660 | int r; |
| @@ -5648,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5648 | kvm_handle_pmu_event(vcpu); | 5708 | kvm_handle_pmu_event(vcpu); |
| 5649 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 5709 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
| 5650 | kvm_deliver_pmi(vcpu); | 5710 | kvm_deliver_pmi(vcpu); |
| 5711 | if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) | ||
| 5712 | update_eoi_exitmap(vcpu); | ||
| 5651 | } | 5713 | } |
| 5652 | 5714 | ||
| 5653 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5715 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
| @@ -5656,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5656 | /* enable NMI/IRQ window open exits if needed */ | 5718 | /* enable NMI/IRQ window open exits if needed */ |
| 5657 | if (vcpu->arch.nmi_pending) | 5719 | if (vcpu->arch.nmi_pending) |
| 5658 | kvm_x86_ops->enable_nmi_window(vcpu); | 5720 | kvm_x86_ops->enable_nmi_window(vcpu); |
| 5659 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | 5721 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
| 5660 | kvm_x86_ops->enable_irq_window(vcpu); | 5722 | kvm_x86_ops->enable_irq_window(vcpu); |
| 5661 | 5723 | ||
| 5662 | if (kvm_lapic_enabled(vcpu)) { | 5724 | if (kvm_lapic_enabled(vcpu)) { |
| 5725 | /* | ||
| 5726 | * Update architecture specific hints for APIC | ||
| 5727 | * virtual interrupt delivery. | ||
| 5728 | */ | ||
| 5729 | if (kvm_x86_ops->hwapic_irr_update) | ||
| 5730 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
| 5731 | kvm_lapic_find_highest_irr(vcpu)); | ||
| 5663 | update_cr8_intercept(vcpu); | 5732 | update_cr8_intercept(vcpu); |
| 5664 | kvm_lapic_sync_to_vapic(vcpu); | 5733 | kvm_lapic_sync_to_vapic(vcpu); |
| 5665 | } | 5734 | } |
| @@ -6839,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
| 6839 | struct kvm_memory_slot *memslot, | 6908 | struct kvm_memory_slot *memslot, |
| 6840 | struct kvm_memory_slot old, | 6909 | struct kvm_memory_slot old, |
| 6841 | struct kvm_userspace_memory_region *mem, | 6910 | struct kvm_userspace_memory_region *mem, |
| 6842 | int user_alloc) | 6911 | bool user_alloc) |
| 6843 | { | 6912 | { |
| 6844 | int npages = memslot->npages; | 6913 | int npages = memslot->npages; |
| 6845 | int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; | ||
| 6846 | |||
| 6847 | /* Prevent internal slot pages from being moved by fork()/COW. */ | ||
| 6848 | if (memslot->id >= KVM_MEMORY_SLOTS) | ||
| 6849 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | ||
| 6850 | 6914 | ||
| 6851 | /*To keep backward compatibility with older userspace, | 6915 | /* |
| 6852 | *x86 needs to handle !user_alloc case. | 6916 | * Only private memory slots need to be mapped here since |
| 6917 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. | ||
| 6853 | */ | 6918 | */ |
| 6854 | if (!user_alloc) { | 6919 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { |
| 6855 | if (npages && !old.npages) { | 6920 | unsigned long userspace_addr; |
| 6856 | unsigned long userspace_addr; | ||
| 6857 | 6921 | ||
| 6858 | userspace_addr = vm_mmap(NULL, 0, | 6922 | /* |
| 6859 | npages * PAGE_SIZE, | 6923 | * MAP_SHARED to prevent internal slot pages from being moved |
| 6860 | PROT_READ | PROT_WRITE, | 6924 | * by fork()/COW. |
| 6861 | map_flags, | 6925 | */ |
| 6862 | 0); | 6926 | userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, |
| 6927 | PROT_READ | PROT_WRITE, | ||
| 6928 | MAP_SHARED | MAP_ANONYMOUS, 0); | ||
| 6863 | 6929 | ||
| 6864 | if (IS_ERR((void *)userspace_addr)) | 6930 | if (IS_ERR((void *)userspace_addr)) |
| 6865 | return PTR_ERR((void *)userspace_addr); | 6931 | return PTR_ERR((void *)userspace_addr); |
| 6866 | 6932 | ||
| 6867 | memslot->userspace_addr = userspace_addr; | 6933 | memslot->userspace_addr = userspace_addr; |
| 6868 | } | ||
| 6869 | } | 6934 | } |
| 6870 | 6935 | ||
| 6871 | |||
| 6872 | return 0; | 6936 | return 0; |
| 6873 | } | 6937 | } |
| 6874 | 6938 | ||
| 6875 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 6939 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
| 6876 | struct kvm_userspace_memory_region *mem, | 6940 | struct kvm_userspace_memory_region *mem, |
| 6877 | struct kvm_memory_slot old, | 6941 | struct kvm_memory_slot old, |
| 6878 | int user_alloc) | 6942 | bool user_alloc) |
| 6879 | { | 6943 | { |
| 6880 | 6944 | ||
| 6881 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; | 6945 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; |
| 6882 | 6946 | ||
| 6883 | if (!user_alloc && !old.user_alloc && old.npages && !npages) { | 6947 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { |
| 6884 | int ret; | 6948 | int ret; |
| 6885 | 6949 | ||
| 6886 | ret = vm_munmap(old.userspace_addr, | 6950 | ret = vm_munmap(old.userspace_addr, |
| @@ -6894,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
| 6894 | if (!kvm->arch.n_requested_mmu_pages) | 6958 | if (!kvm->arch.n_requested_mmu_pages) |
| 6895 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 6959 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
| 6896 | 6960 | ||
| 6897 | spin_lock(&kvm->mmu_lock); | ||
| 6898 | if (nr_mmu_pages) | 6961 | if (nr_mmu_pages) |
| 6899 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 6962 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
| 6900 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 6963 | /* |
| 6901 | spin_unlock(&kvm->mmu_lock); | 6964 | * Write protect all pages for dirty logging. |
| 6965 | * Existing largepage mappings are destroyed here and new ones will | ||
| 6966 | * not be created until the end of the logging. | ||
| 6967 | */ | ||
| 6968 | if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | ||
| 6969 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | ||
| 6902 | /* | 6970 | /* |
| 6903 | * If memory slot is created, or moved, we need to clear all | 6971 | * If memory slot is created, or moved, we need to clear all |
| 6904 | * mmio sptes. | 6972 | * mmio sptes. |
