aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt34
-rw-r--r--arch/ia64/kvm/vti.h26
-rw-r--r--arch/powerpc/include/asm/kvm.h184
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h1
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h9
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kvm/44x.c10
-rw-r--r--arch/powerpc/kvm/44x_emulate.c2
-rw-r--r--arch/powerpc/kvm/booke.c154
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S1
-rw-r--r--arch/powerpc/kvm/e500.c76
-rw-r--r--arch/powerpc/kvm/e500_emulate.c7
-rw-r--r--arch/powerpc/kvm/e500_tlb.c13
-rw-r--r--arch/powerpc/kvm/emulate.c15
-rw-r--r--arch/powerpc/kvm/powerpc.c21
-rw-r--r--arch/powerpc/kvm/timing.c31
-rw-r--r--arch/x86/include/asm/kvm_emulate.h193
-rw-r--r--arch/x86/include/asm/kvm_host.h55
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/kvm/emulate.c1754
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/mmu.c16
-rw-r--r--arch/x86/kvm/paging_tmpl.h83
-rw-r--r--arch/x86/kvm/svm.c585
-rw-r--r--arch/x86/kvm/vmx.c228
-rw-r--r--arch/x86/kvm/x86.c570
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--include/linux/kvm.h6
-rw-r--r--include/linux/kvm_host.h30
-rw-r--r--virt/kvm/ioapic.c2
-rw-r--r--virt/kvm/kvm_main.c26
34 files changed, 3054 insertions, 1093 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 9bef4e4cec50..42542eb802ca 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -175,7 +175,10 @@ Parameters: vcpu id (apic id on x86)
175Returns: vcpu fd on success, -1 on error 175Returns: vcpu fd on success, -1 on error
176 176
177This API adds a vcpu to a virtual machine. The vcpu id is a small integer 177This API adds a vcpu to a virtual machine. The vcpu id is a small integer
178in the range [0, max_vcpus). 178in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the
179KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
180If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
181cpus max.
179 182
1804.8 KVM_GET_DIRTY_LOG (vm ioctl) 1834.8 KVM_GET_DIRTY_LOG (vm ioctl)
181 184
@@ -261,7 +264,7 @@ See KVM_GET_REGS for the data structure.
2614.13 KVM_GET_SREGS 2644.13 KVM_GET_SREGS
262 265
263Capability: basic 266Capability: basic
264Architectures: x86 267Architectures: x86, ppc
265Type: vcpu ioctl 268Type: vcpu ioctl
266Parameters: struct kvm_sregs (out) 269Parameters: struct kvm_sregs (out)
267Returns: 0 on success, -1 on error 270Returns: 0 on success, -1 on error
@@ -279,6 +282,8 @@ struct kvm_sregs {
279 __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; 282 __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
280}; 283};
281 284
285/* ppc -- see arch/powerpc/include/asm/kvm.h */
286
282interrupt_bitmap is a bitmap of pending external interrupts. At most 287interrupt_bitmap is a bitmap of pending external interrupts. At most
283one bit may be set. This interrupt has been acknowledged by the APIC 288one bit may be set. This interrupt has been acknowledged by the APIC
284but not yet injected into the cpu core. 289but not yet injected into the cpu core.
@@ -286,7 +291,7 @@ but not yet injected into the cpu core.
2864.14 KVM_SET_SREGS 2914.14 KVM_SET_SREGS
287 292
288Capability: basic 293Capability: basic
289Architectures: x86 294Architectures: x86, ppc
290Type: vcpu ioctl 295Type: vcpu ioctl
291Parameters: struct kvm_sregs (in) 296Parameters: struct kvm_sregs (in)
292Returns: 0 on success, -1 on error 297Returns: 0 on success, -1 on error
@@ -1263,6 +1268,29 @@ struct kvm_assigned_msix_entry {
1263 __u16 padding[3]; 1268 __u16 padding[3];
1264}; 1269};
1265 1270
12714.54 KVM_SET_TSC_KHZ
1272
1273Capability: KVM_CAP_TSC_CONTROL
1274Architectures: x86
1275Type: vcpu ioctl
1276Parameters: virtual tsc_khz
1277Returns: 0 on success, -1 on error
1278
1279Specifies the tsc frequency for the virtual machine. The unit of the
1280frequency is KHz.
1281
12824.55 KVM_GET_TSC_KHZ
1283
1284Capability: KVM_CAP_GET_TSC_KHZ
1285Architectures: x86
1286Type: vcpu ioctl
1287Parameters: none
1288Returns: virtual tsc-khz on success, negative value on error
1289
1290Returns the tsc frequency of the guest. The unit of the return value is
1291KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
1292error.
1293
12665. The kvm_run structure 12945. The kvm_run structure
1267 1295
1268Application code obtains a pointer to the kvm_run structure by 1296Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
index f6c5617e16af..b214b5b0432d 100644
--- a/arch/ia64/kvm/vti.h
+++ b/arch/ia64/kvm/vti.h
@@ -83,13 +83,13 @@
83union vac { 83union vac {
84 unsigned long value; 84 unsigned long value;
85 struct { 85 struct {
86 int a_int:1; 86 unsigned int a_int:1;
87 int a_from_int_cr:1; 87 unsigned int a_from_int_cr:1;
88 int a_to_int_cr:1; 88 unsigned int a_to_int_cr:1;
89 int a_from_psr:1; 89 unsigned int a_from_psr:1;
90 int a_from_cpuid:1; 90 unsigned int a_from_cpuid:1;
91 int a_cover:1; 91 unsigned int a_cover:1;
92 int a_bsw:1; 92 unsigned int a_bsw:1;
93 long reserved:57; 93 long reserved:57;
94 }; 94 };
95}; 95};
@@ -97,12 +97,12 @@ union vac {
97union vdc { 97union vdc {
98 unsigned long value; 98 unsigned long value;
99 struct { 99 struct {
100 int d_vmsw:1; 100 unsigned int d_vmsw:1;
101 int d_extint:1; 101 unsigned int d_extint:1;
102 int d_ibr_dbr:1; 102 unsigned int d_ibr_dbr:1;
103 int d_pmc:1; 103 unsigned int d_pmc:1;
104 int d_to_pmd:1; 104 unsigned int d_to_pmd:1;
105 int d_itm:1; 105 unsigned int d_itm:1;
106 long reserved:58; 106 long reserved:58;
107 }; 107 };
108}; 108};
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 18ea6963ad77..d2ca5ed3877b 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -45,6 +45,114 @@ struct kvm_regs {
45 __u64 gpr[32]; 45 __u64 gpr[32];
46}; 46};
47 47
48#define KVM_SREGS_E_IMPL_NONE 0
49#define KVM_SREGS_E_IMPL_FSL 1
50
51#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
52
53/*
54 * Feature bits indicate which sections of the sregs struct are valid,
55 * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
56 * corresponding to unset feature bits will not be modified. This allows
57 * restoring a checkpoint made without that feature, while keeping the
58 * default values of the new registers.
59 *
60 * KVM_SREGS_E_BASE contains:
61 * CSRR0/1 (refers to SRR2/3 on 40x)
62 * ESR
63 * DEAR
64 * MCSR
65 * TSR
66 * TCR
67 * DEC
68 * TB
69 * VRSAVE (USPRG0)
70 */
71#define KVM_SREGS_E_BASE (1 << 0)
72
73/*
74 * KVM_SREGS_E_ARCH206 contains:
75 *
76 * PIR
77 * MCSRR0/1
78 * DECAR
79 * IVPR
80 */
81#define KVM_SREGS_E_ARCH206 (1 << 1)
82
83/*
84 * Contains EPCR, plus the upper half of 64-bit registers
85 * that are 32-bit on 32-bit implementations.
86 */
87#define KVM_SREGS_E_64 (1 << 2)
88
89#define KVM_SREGS_E_SPRG8 (1 << 3)
90#define KVM_SREGS_E_MCIVPR (1 << 4)
91
92/*
93 * IVORs are used -- contains IVOR0-15, plus additional IVORs
94 * in combination with an appropriate feature bit.
95 */
96#define KVM_SREGS_E_IVOR (1 << 5)
97
98/*
99 * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
100 * Also TLBnPS if MMUCFG[MAVN] = 1.
101 */
102#define KVM_SREGS_E_ARCH206_MMU (1 << 6)
103
104/* DBSR, DBCR, IAC, DAC, DVC */
105#define KVM_SREGS_E_DEBUG (1 << 7)
106
107/* Enhanced debug -- DSRR0/1, SPRG9 */
108#define KVM_SREGS_E_ED (1 << 8)
109
110/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
111#define KVM_SREGS_E_SPE (1 << 9)
112
113/* External Proxy (EXP) -- EPR */
114#define KVM_SREGS_EXP (1 << 10)
115
116/* External PID (E.PD) -- EPSC/EPLC */
117#define KVM_SREGS_E_PD (1 << 11)
118
119/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
120#define KVM_SREGS_E_PC (1 << 12)
121
122/* Page table (E.PT) -- EPTCFG */
123#define KVM_SREGS_E_PT (1 << 13)
124
125/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
126#define KVM_SREGS_E_PM (1 << 14)
127
128/*
129 * Special updates:
130 *
131 * Some registers may change even while a vcpu is not running.
132 * To avoid losing these changes, by default these registers are
133 * not updated by KVM_SET_SREGS. To force an update, set the bit
134 * in u.e.update_special corresponding to the register to be updated.
135 *
136 * The update_special field is zero on return from KVM_GET_SREGS.
137 *
138 * When restoring a checkpoint, the caller can set update_special
139 * to 0xffffffff to ensure that everything is restored, even new features
140 * that the caller doesn't know about.
141 */
142#define KVM_SREGS_E_UPDATE_MCSR (1 << 0)
143#define KVM_SREGS_E_UPDATE_TSR (1 << 1)
144#define KVM_SREGS_E_UPDATE_DEC (1 << 2)
145#define KVM_SREGS_E_UPDATE_DBSR (1 << 3)
146
147/*
148 * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
149 * previous KVM_GET_REGS.
150 *
151 * Unless otherwise indicated, setting any register with KVM_SET_SREGS
152 * directly sets its value. It does not trigger any special semantics such
153 * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct
154 * just received from KVM_GET_SREGS is always a no-op.
155 */
48struct kvm_sregs { 156struct kvm_sregs {
49 __u32 pvr; 157 __u32 pvr;
50 union { 158 union {
@@ -62,6 +170,82 @@ struct kvm_sregs {
62 __u64 dbat[8]; 170 __u64 dbat[8];
63 } ppc32; 171 } ppc32;
64 } s; 172 } s;
173 struct {
174 union {
175 struct { /* KVM_SREGS_E_IMPL_FSL */
176 __u32 features; /* KVM_SREGS_E_FSL_ */
177 __u32 svr;
178 __u64 mcar;
179 __u32 hid0;
180
181 /* KVM_SREGS_E_FSL_PIDn */
182 __u32 pid1, pid2;
183 } fsl;
184 __u8 pad[256];
185 } impl;
186
187 __u32 features; /* KVM_SREGS_E_ */
188 __u32 impl_id; /* KVM_SREGS_E_IMPL_ */
189 __u32 update_special; /* KVM_SREGS_E_UPDATE_ */
190 __u32 pir; /* read-only */
191 __u64 sprg8;
192 __u64 sprg9; /* E.ED */
193 __u64 csrr0;
194 __u64 dsrr0; /* E.ED */
195 __u64 mcsrr0;
196 __u32 csrr1;
197 __u32 dsrr1; /* E.ED */
198 __u32 mcsrr1;
199 __u32 esr;
200 __u64 dear;
201 __u64 ivpr;
202 __u64 mcivpr;
203 __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */
204
205 __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */
206 __u32 tcr;
207 __u32 decar;
208 __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */
209
210 /*
211 * Userspace can read TB directly, but the
212 * value reported here is consistent with "dec".
213 *
214 * Read-only.
215 */
216 __u64 tb;
217
218 __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
219 __u32 dbcr[3];
220 __u32 iac[4];
221 __u32 dac[2];
222 __u32 dvc[2];
223 __u8 num_iac; /* read-only */
224 __u8 num_dac; /* read-only */
225 __u8 num_dvc; /* read-only */
226 __u8 pad;
227
228 __u32 epr; /* EXP */
229 __u32 vrsave; /* a.k.a. USPRG0 */
230 __u32 epcr; /* KVM_SREGS_E_64 */
231
232 __u32 mas0;
233 __u32 mas1;
234 __u64 mas2;
235 __u64 mas7_3;
236 __u32 mas4;
237 __u32 mas6;
238
239 __u32 ivor_low[16]; /* IVOR0-15 */
240 __u32 ivor_high[18]; /* IVOR32+, plus room to expand */
241
242 __u32 mmucfg; /* read-only */
243 __u32 eptcfg; /* E.PT, read-only */
244 __u32 tlbcfg[4];/* read-only */
245 __u32 tlbps[4]; /* read-only */
246
247 __u32 eplc, epsc; /* E.PD */
248 } e;
65 __u8 pad[1020]; 249 __u8 pad[1020];
66 } u; 250 } u;
67}; 251};
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index d22d39942a92..a0e57618ff33 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
61 return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); 61 return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
62} 62}
63 63
64void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
65void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); 64void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
66void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); 65void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
67 66
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index 7fea26fffb25..7a2a565f88c4 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 {
43 43
44 u32 host_pid[E500_PID_NUM]; 44 u32 host_pid[E500_PID_NUM];
45 u32 pid[E500_PID_NUM]; 45 u32 pid[E500_PID_NUM];
46 u32 svr;
46 47
47 u32 mas0; 48 u32 mas0;
48 u32 mas1; 49 u32 mas1;
@@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 {
58 u32 hid1; 59 u32 hid1;
59 u32 tlb0cfg; 60 u32 tlb0cfg;
60 u32 tlb1cfg; 61 u32 tlb1cfg;
62 u64 mcar;
61 63
62 struct kvm_vcpu vcpu; 64 struct kvm_vcpu vcpu;
63}; 65};
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bba3b9b72a39..186f150b9b89 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -223,6 +223,7 @@ struct kvm_vcpu_arch {
223 ulong hflags; 223 ulong hflags;
224 ulong guest_owned_ext; 224 ulong guest_owned_ext;
225#endif 225#endif
226 u32 vrsave; /* also USPRG0 */
226 u32 mmucr; 227 u32 mmucr;
227 ulong sprg4; 228 ulong sprg4;
228 ulong sprg5; 229 ulong sprg5;
@@ -232,6 +233,9 @@ struct kvm_vcpu_arch {
232 ulong csrr1; 233 ulong csrr1;
233 ulong dsrr0; 234 ulong dsrr0;
234 ulong dsrr1; 235 ulong dsrr1;
236 ulong mcsrr0;
237 ulong mcsrr1;
238 ulong mcsr;
235 ulong esr; 239 ulong esr;
236 u32 dec; 240 u32 dec;
237 u32 decar; 241 u32 decar;
@@ -255,6 +259,7 @@ struct kvm_vcpu_arch {
255 u32 dbsr; 259 u32 dbsr;
256 260
257#ifdef CONFIG_KVM_EXIT_TIMING 261#ifdef CONFIG_KVM_EXIT_TIMING
262 struct mutex exit_timing_lock;
258 struct kvmppc_exit_timing timing_exit; 263 struct kvmppc_exit_timing timing_exit;
259 struct kvmppc_exit_timing timing_last_enter; 264 struct kvmppc_exit_timing timing_last_enter;
260 u32 last_exit_type; 265 u32 last_exit_type;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ecb3bc74c344..9345238edecf 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
61 struct kvm_vcpu *vcpu); 61 struct kvm_vcpu *vcpu);
62extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 62extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
63extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); 63extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
64extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
64 65
65/* Core-specific hooks */ 66/* Core-specific hooks */
66 67
@@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
142 return r; 143 return r;
143} 144}
144 145
146void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
147int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
148
149void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
150int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
151
152void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
153
145#endif /* __POWERPC_KVM_PPC_H__ */ 154#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6887661ac072..36e1c8a29be8 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -396,6 +396,7 @@ int main(void)
396 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 396 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
397 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 397 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
398 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 398 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
399 DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
399 DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); 400 DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
400 DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); 401 DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
401 DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); 402 DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 74d0e7421143..da3a1225c0ac 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
107 return 0; 107 return 0;
108} 108}
109 109
110void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
111{
112 kvmppc_get_sregs_ivor(vcpu, sregs);
113}
114
115int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
116{
117 return kvmppc_set_sregs_ivor(vcpu, sregs);
118}
119
110struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 120struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
111{ 121{
112 struct kvmppc_vcpu_44x *vcpu_44x; 122 struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 65ea083a5b27..549bb2c9a47a 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
158 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); 158 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
159 } 159 }
160 160
161 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
162 return emulated; 161 return emulated;
163} 162}
164 163
@@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
179 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 178 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
180 } 179 }
181 180
182 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
183 return emulated; 181 return emulated;
184} 182}
185 183
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ef76acb455c3..8462b3a1c1c7 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
569 kvmppc_set_msr(vcpu, regs->msr); 569 kvmppc_set_msr(vcpu, regs->msr);
570 vcpu->arch.shared->srr0 = regs->srr0; 570 vcpu->arch.shared->srr0 = regs->srr0;
571 vcpu->arch.shared->srr1 = regs->srr1; 571 vcpu->arch.shared->srr1 = regs->srr1;
572 kvmppc_set_pid(vcpu, regs->pid);
572 vcpu->arch.shared->sprg0 = regs->sprg0; 573 vcpu->arch.shared->sprg0 = regs->sprg0;
573 vcpu->arch.shared->sprg1 = regs->sprg1; 574 vcpu->arch.shared->sprg1 = regs->sprg1;
574 vcpu->arch.shared->sprg2 = regs->sprg2; 575 vcpu->arch.shared->sprg2 = regs->sprg2;
@@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
584 return 0; 585 return 0;
585} 586}
586 587
588static void get_sregs_base(struct kvm_vcpu *vcpu,
589 struct kvm_sregs *sregs)
590{
591 u64 tb = get_tb();
592
593 sregs->u.e.features |= KVM_SREGS_E_BASE;
594
595 sregs->u.e.csrr0 = vcpu->arch.csrr0;
596 sregs->u.e.csrr1 = vcpu->arch.csrr1;
597 sregs->u.e.mcsr = vcpu->arch.mcsr;
598 sregs->u.e.esr = vcpu->arch.esr;
599 sregs->u.e.dear = vcpu->arch.shared->dar;
600 sregs->u.e.tsr = vcpu->arch.tsr;
601 sregs->u.e.tcr = vcpu->arch.tcr;
602 sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
603 sregs->u.e.tb = tb;
604 sregs->u.e.vrsave = vcpu->arch.vrsave;
605}
606
607static int set_sregs_base(struct kvm_vcpu *vcpu,
608 struct kvm_sregs *sregs)
609{
610 if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
611 return 0;
612
613 vcpu->arch.csrr0 = sregs->u.e.csrr0;
614 vcpu->arch.csrr1 = sregs->u.e.csrr1;
615 vcpu->arch.mcsr = sregs->u.e.mcsr;
616 vcpu->arch.esr = sregs->u.e.esr;
617 vcpu->arch.shared->dar = sregs->u.e.dear;
618 vcpu->arch.vrsave = sregs->u.e.vrsave;
619 vcpu->arch.tcr = sregs->u.e.tcr;
620
621 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
622 vcpu->arch.dec = sregs->u.e.dec;
623
624 kvmppc_emulate_dec(vcpu);
625
626 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
627 /*
628 * FIXME: existing KVM timer handling is incomplete.
629 * TSR cannot be read by the guest, and its value in
630 * vcpu->arch is always zero. For now, just handle
631 * the case where the caller is trying to inject a
632 * decrementer interrupt.
633 */
634
635 if ((sregs->u.e.tsr & TSR_DIS) &&
636 (vcpu->arch.tcr & TCR_DIE))
637 kvmppc_core_queue_dec(vcpu);
638 }
639
640 return 0;
641}
642
643static void get_sregs_arch206(struct kvm_vcpu *vcpu,
644 struct kvm_sregs *sregs)
645{
646 sregs->u.e.features |= KVM_SREGS_E_ARCH206;
647
648 sregs->u.e.pir = 0;
649 sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
650 sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
651 sregs->u.e.decar = vcpu->arch.decar;
652 sregs->u.e.ivpr = vcpu->arch.ivpr;
653}
654
655static int set_sregs_arch206(struct kvm_vcpu *vcpu,
656 struct kvm_sregs *sregs)
657{
658 if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
659 return 0;
660
661 if (sregs->u.e.pir != 0)
662 return -EINVAL;
663
664 vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
665 vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
666 vcpu->arch.decar = sregs->u.e.decar;
667 vcpu->arch.ivpr = sregs->u.e.ivpr;
668
669 return 0;
670}
671
672void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
673{
674 sregs->u.e.features |= KVM_SREGS_E_IVOR;
675
676 sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
677 sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
678 sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
679 sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
680 sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
681 sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
682 sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
683 sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
684 sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
685 sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
686 sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
687 sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
688 sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
689 sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
690 sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
691 sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
692}
693
694int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
695{
696 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
697 return 0;
698
699 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
700 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
701 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
702 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
703 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
704 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
705 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
706 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
707 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
708 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
709 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
710 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
711 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
712 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
713 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
714 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
715
716 return 0;
717}
718
587int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 719int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
588 struct kvm_sregs *sregs) 720 struct kvm_sregs *sregs)
589{ 721{
590 return -ENOTSUPP; 722 sregs->pvr = vcpu->arch.pvr;
723
724 get_sregs_base(vcpu, sregs);
725 get_sregs_arch206(vcpu, sregs);
726 kvmppc_core_get_sregs(vcpu, sregs);
727 return 0;
591} 728}
592 729
593int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 730int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
594 struct kvm_sregs *sregs) 731 struct kvm_sregs *sregs)
595{ 732{
596 return -ENOTSUPP; 733 int ret;
734
735 if (vcpu->arch.pvr != sregs->pvr)
736 return -EINVAL;
737
738 ret = set_sregs_base(vcpu, sregs);
739 if (ret < 0)
740 return ret;
741
742 ret = set_sregs_arch206(vcpu, sregs);
743 if (ret < 0)
744 return ret;
745
746 return kvmppc_core_set_sregs(vcpu, sregs);
597} 747}
598 748
599int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 749int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 1cc471faac2d..b58ccae95904 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -380,7 +380,6 @@ lightweight_exit:
380 * because host interrupt handlers would get confused. */ 380 * because host interrupt handlers would get confused. */
381 lwz r1, VCPU_GPR(r1)(r4) 381 lwz r1, VCPU_GPR(r1)(r4)
382 382
383 /* XXX handle USPRG0 */
384 /* Host interrupt handlers may have clobbered these guest-readable 383 /* Host interrupt handlers may have clobbered these guest-readable
385 * SPRGs, so we need to reload them here with the guest's values. */ 384 * SPRGs, so we need to reload them here with the guest's values. */
386 lwz r3, VCPU_SPRG4(r4) 385 lwz r3, VCPU_SPRG4(r4)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e3768ee9b595..318dbc61ba44 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
63 63
64 /* Registers init */ 64 /* Registers init */
65 vcpu->arch.pvr = mfspr(SPRN_PVR); 65 vcpu->arch.pvr = mfspr(SPRN_PVR);
66 vcpu_e500->svr = mfspr(SPRN_SVR);
66 67
67 /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ 68 /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
68 vcpu->vcpu_id = 0; 69 vcpu->vcpu_id = 0;
@@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
96 return 0; 97 return 0;
97} 98}
98 99
100void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
101{
102 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
103
104 sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
105 KVM_SREGS_E_PM;
106 sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
107
108 sregs->u.e.impl.fsl.features = 0;
109 sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
110 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
111 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
112
113 sregs->u.e.mas0 = vcpu_e500->mas0;
114 sregs->u.e.mas1 = vcpu_e500->mas1;
115 sregs->u.e.mas2 = vcpu_e500->mas2;
116 sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
117 sregs->u.e.mas4 = vcpu_e500->mas4;
118 sregs->u.e.mas6 = vcpu_e500->mas6;
119
120 sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
121 sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
122 sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
123 sregs->u.e.tlbcfg[2] = 0;
124 sregs->u.e.tlbcfg[3] = 0;
125
126 sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
127 sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
128 sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
129 sregs->u.e.ivor_high[3] =
130 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
131
132 kvmppc_get_sregs_ivor(vcpu, sregs);
133}
134
135int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
136{
137 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
138
139 if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
140 vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
141 vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
142 vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
143 }
144
145 if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
146 vcpu_e500->mas0 = sregs->u.e.mas0;
147 vcpu_e500->mas1 = sregs->u.e.mas1;
148 vcpu_e500->mas2 = sregs->u.e.mas2;
149 vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
150 vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
151 vcpu_e500->mas4 = sregs->u.e.mas4;
152 vcpu_e500->mas6 = sregs->u.e.mas6;
153 }
154
155 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
156 return 0;
157
158 if (sregs->u.e.features & KVM_SREGS_E_SPE) {
159 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
160 sregs->u.e.ivor_high[0];
161 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
162 sregs->u.e.ivor_high[1];
163 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
164 sregs->u.e.ivor_high[2];
165 }
166
167 if (sregs->u.e.features & KVM_SREGS_E_PM) {
168 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
169 sregs->u.e.ivor_high[3];
170 }
171
172 return kvmppc_set_sregs_ivor(vcpu, sregs);
173}
174
99struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 175struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
100{ 176{
101 struct kvmppc_vcpu_e500 *vcpu_e500; 177 struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 8e3edfbc9634..69cd665a0caf 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. 2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 * 3 *
4 * Author: Yu Liu, <yu.liu@freescale.com> 4 * Author: Yu Liu, <yu.liu@freescale.com>
5 * 5 *
@@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
78 78
79 switch (sprn) { 79 switch (sprn) {
80 case SPRN_PID: 80 case SPRN_PID:
81 vcpu_e500->pid[0] = vcpu->arch.shadow_pid = 81 kvmppc_set_pid(vcpu, spr_val);
82 vcpu->arch.pid = spr_val;
83 break; 82 break;
84 case SPRN_PID1: 83 case SPRN_PID1:
85 vcpu_e500->pid[1] = spr_val; break; 84 vcpu_e500->pid[1] = spr_val; break;
@@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
175 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; 174 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
176 case SPRN_HID1: 175 case SPRN_HID1:
177 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; 176 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
177 case SPRN_SVR:
178 kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
178 179
179 case SPRN_MMUCSR0: 180 case SPRN_MMUCSR0:
180 kvmppc_set_gpr(vcpu, rt, 0); break; 181 kvmppc_set_gpr(vcpu, rt, 0); break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index d6d6d47a75a9..b18fe353397d 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. 2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 * 3 *
4 * Author: Yu Liu, yu.liu@freescale.com 4 * Author: Yu Liu, yu.liu@freescale.com
5 * 5 *
@@ -24,6 +24,7 @@
24#include "../mm/mmu_decl.h" 24#include "../mm/mmu_decl.h"
25#include "e500_tlb.h" 25#include "e500_tlb.h"
26#include "trace.h" 26#include "trace.h"
27#include "timing.h"
27 28
28#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) 29#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
29 30
@@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
506 vcpu_e500->mas7 = 0; 507 vcpu_e500->mas7 = 0;
507 } 508 }
508 509
510 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
509 return EMULATE_DONE; 511 return EMULATE_DONE;
510} 512}
511 513
@@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
571 write_host_tlbe(vcpu_e500, stlbsel, sesel); 573 write_host_tlbe(vcpu_e500, stlbsel, sesel);
572 } 574 }
573 575
576 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
574 return EMULATE_DONE; 577 return EMULATE_DONE;
575} 578}
576 579
@@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
672 return -1; 675 return -1;
673} 676}
674 677
678void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
679{
680 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
681
682 vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
683 vcpu->arch.pid = pid;
684}
685
675void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) 686void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
676{ 687{
677 struct tlbe *tlbe; 688 struct tlbe *tlbe;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c64fd2909bb2..141dce3c6810 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
114 } 114 }
115} 115}
116 116
117u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
118{
119 u64 jd = tb - vcpu->arch.dec_jiffies;
120 return vcpu->arch.dec - jd;
121}
122
117/* XXX to do: 123/* XXX to do:
118 * lhax 124 * lhax
119 * lhaux 125 * lhaux
@@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
279 285
280 case SPRN_DEC: 286 case SPRN_DEC:
281 { 287 {
282 u64 jd = get_tb() - vcpu->arch.dec_jiffies; 288 kvmppc_set_gpr(vcpu, rt,
283 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); 289 kvmppc_get_dec(vcpu, get_tb()));
284 pr_debug("mfDEC: %x - %llx = %lx\n",
285 vcpu->arch.dec, jd,
286 kvmppc_get_gpr(vcpu, rt));
287 break; 290 break;
288 } 291 }
289 default: 292 default:
@@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
294 } 297 }
295 break; 298 break;
296 } 299 }
300 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
297 break; 301 break;
298 302
299 case OP_31_XOP_STHX: 303 case OP_31_XOP_STHX:
@@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
363 printk("mtspr: unknown spr %x\n", sprn); 367 printk("mtspr: unknown spr %x\n", sprn);
364 break; 368 break;
365 } 369 }
370 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
366 break; 371 break;
367 372
368 case OP_31_XOP_DCBI: 373 case OP_31_XOP_DCBI:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 99758460efde..616dd516ca1f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext)
175 int r; 175 int r;
176 176
177 switch (ext) { 177 switch (ext) {
178#ifdef CONFIG_BOOKE
179 case KVM_CAP_PPC_BOOKE_SREGS:
180#else
178 case KVM_CAP_PPC_SEGSTATE: 181 case KVM_CAP_PPC_SEGSTATE:
182#endif
179 case KVM_CAP_PPC_PAIRED_SINGLES: 183 case KVM_CAP_PPC_PAIRED_SINGLES:
180 case KVM_CAP_PPC_UNSET_IRQ: 184 case KVM_CAP_PPC_UNSET_IRQ:
181 case KVM_CAP_PPC_IRQ_LEVEL: 185 case KVM_CAP_PPC_IRQ_LEVEL:
@@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
284 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); 288 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
285 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 289 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
286 290
291#ifdef CONFIG_KVM_EXIT_TIMING
292 mutex_init(&vcpu->arch.exit_timing_lock);
293#endif
294
287 return 0; 295 return 0;
288} 296}
289 297
@@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
294 302
295void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 303void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
296{ 304{
305#ifdef CONFIG_BOOKE
306 /*
307 * vrsave (formerly usprg0) isn't used by Linux, but may
308 * be used by the guest.
309 *
310 * On non-booke this is associated with Altivec and
311 * is handled by code in book3s.c.
312 */
313 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
314#endif
297 kvmppc_core_vcpu_load(vcpu, cpu); 315 kvmppc_core_vcpu_load(vcpu, cpu);
298} 316}
299 317
300void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 318void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
301{ 319{
302 kvmppc_core_vcpu_put(vcpu); 320 kvmppc_core_vcpu_put(vcpu);
321#ifdef CONFIG_BOOKE
322 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
323#endif
303} 324}
304 325
305int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 326int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index a021f5827a33..319177df9587 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
34{ 34{
35 int i; 35 int i;
36 36
37 /* pause guest execution to avoid concurrent updates */ 37 /* Take a lock to avoid concurrent updates */
38 mutex_lock(&vcpu->mutex); 38 mutex_lock(&vcpu->arch.exit_timing_lock);
39 39
40 vcpu->arch.last_exit_type = 0xDEAD; 40 vcpu->arch.last_exit_type = 0xDEAD;
41 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { 41 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
@@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
49 vcpu->arch.timing_exit.tv64 = 0; 49 vcpu->arch.timing_exit.tv64 = 0;
50 vcpu->arch.timing_last_enter.tv64 = 0; 50 vcpu->arch.timing_last_enter.tv64 = 0;
51 51
52 mutex_unlock(&vcpu->mutex); 52 mutex_unlock(&vcpu->arch.exit_timing_lock);
53} 53}
54 54
55static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) 55static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
@@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
65 return; 65 return;
66 } 66 }
67 67
68 mutex_lock(&vcpu->arch.exit_timing_lock);
69
68 vcpu->arch.timing_count_type[type]++; 70 vcpu->arch.timing_count_type[type]++;
69 71
70 /* sum */ 72 /* sum */
@@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
93 vcpu->arch.timing_min_duration[type] = duration; 95 vcpu->arch.timing_min_duration[type] = duration;
94 if (unlikely(duration > vcpu->arch.timing_max_duration[type])) 96 if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
95 vcpu->arch.timing_max_duration[type] = duration; 97 vcpu->arch.timing_max_duration[type] = duration;
98
99 mutex_unlock(&vcpu->arch.exit_timing_lock);
96} 100}
97 101
98void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) 102void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
@@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
147{ 151{
148 struct kvm_vcpu *vcpu = m->private; 152 struct kvm_vcpu *vcpu = m->private;
149 int i; 153 int i;
154 u64 min, max, sum, sum_quad;
150 155
151 seq_printf(m, "%s", "type count min max sum sum_squared\n"); 156 seq_printf(m, "%s", "type count min max sum sum_squared\n");
152 157
158
153 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { 159 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
160
161 min = vcpu->arch.timing_min_duration[i];
162 do_div(min, tb_ticks_per_usec);
163 max = vcpu->arch.timing_max_duration[i];
164 do_div(max, tb_ticks_per_usec);
165 sum = vcpu->arch.timing_sum_duration[i];
166 do_div(sum, tb_ticks_per_usec);
167 sum_quad = vcpu->arch.timing_sum_quad_duration[i];
168 do_div(sum_quad, tb_ticks_per_usec);
169
154 seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", 170 seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
155 kvm_exit_names[i], 171 kvm_exit_names[i],
156 vcpu->arch.timing_count_type[i], 172 vcpu->arch.timing_count_type[i],
157 vcpu->arch.timing_min_duration[i], 173 min,
158 vcpu->arch.timing_max_duration[i], 174 max,
159 vcpu->arch.timing_sum_duration[i], 175 sum,
160 vcpu->arch.timing_sum_quad_duration[i]); 176 sum_quad);
177
161 } 178 }
162 return 0; 179 return 0;
163} 180}
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0f5213564326..0049211959c0 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -14,6 +14,8 @@
14#include <asm/desc_defs.h> 14#include <asm/desc_defs.h>
15 15
16struct x86_emulate_ctxt; 16struct x86_emulate_ctxt;
17enum x86_intercept;
18enum x86_intercept_stage;
17 19
18struct x86_exception { 20struct x86_exception {
19 u8 vector; 21 u8 vector;
@@ -24,6 +26,24 @@ struct x86_exception {
24}; 26};
25 27
26/* 28/*
29 * This struct is used to carry enough information from the instruction
30 * decoder to main KVM so that a decision can be made whether the
31 * instruction needs to be intercepted or not.
32 */
33struct x86_instruction_info {
34 u8 intercept; /* which intercept */
35 u8 rep_prefix; /* rep prefix? */
36 u8 modrm_mod; /* mod part of modrm */
37 u8 modrm_reg; /* index of register used */
38 u8 modrm_rm; /* rm part of modrm */
39 u64 src_val; /* value of source operand */
40 u8 src_bytes; /* size of source operand */
41 u8 dst_bytes; /* size of destination operand */
42 u8 ad_bytes; /* size of src/dst address */
43 u64 next_rip; /* rip following the instruction */
44};
45
46/*
27 * x86_emulate_ops: 47 * x86_emulate_ops:
28 * 48 *
29 * These operations represent the instruction emulator's interface to memory. 49 * These operations represent the instruction emulator's interface to memory.
@@ -62,6 +82,7 @@ struct x86_exception {
62#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ 82#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */
63#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ 83#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */
64#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ 84#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
85#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
65 86
66struct x86_emulate_ops { 87struct x86_emulate_ops {
67 /* 88 /*
@@ -71,8 +92,9 @@ struct x86_emulate_ops {
71 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 92 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
72 * @bytes: [IN ] Number of bytes to read from memory. 93 * @bytes: [IN ] Number of bytes to read from memory.
73 */ 94 */
74 int (*read_std)(unsigned long addr, void *val, 95 int (*read_std)(struct x86_emulate_ctxt *ctxt,
75 unsigned int bytes, struct kvm_vcpu *vcpu, 96 unsigned long addr, void *val,
97 unsigned int bytes,
76 struct x86_exception *fault); 98 struct x86_exception *fault);
77 99
78 /* 100 /*
@@ -82,8 +104,8 @@ struct x86_emulate_ops {
82 * @val: [OUT] Value write to memory, zero-extended to 'u_long'. 104 * @val: [OUT] Value write to memory, zero-extended to 'u_long'.
83 * @bytes: [IN ] Number of bytes to write to memory. 105 * @bytes: [IN ] Number of bytes to write to memory.
84 */ 106 */
85 int (*write_std)(unsigned long addr, void *val, 107 int (*write_std)(struct x86_emulate_ctxt *ctxt,
86 unsigned int bytes, struct kvm_vcpu *vcpu, 108 unsigned long addr, void *val, unsigned int bytes,
87 struct x86_exception *fault); 109 struct x86_exception *fault);
88 /* 110 /*
89 * fetch: Read bytes of standard (non-emulated/special) memory. 111 * fetch: Read bytes of standard (non-emulated/special) memory.
@@ -92,8 +114,8 @@ struct x86_emulate_ops {
92 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 114 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
93 * @bytes: [IN ] Number of bytes to read from memory. 115 * @bytes: [IN ] Number of bytes to read from memory.
94 */ 116 */
95 int (*fetch)(unsigned long addr, void *val, 117 int (*fetch)(struct x86_emulate_ctxt *ctxt,
96 unsigned int bytes, struct kvm_vcpu *vcpu, 118 unsigned long addr, void *val, unsigned int bytes,
97 struct x86_exception *fault); 119 struct x86_exception *fault);
98 120
99 /* 121 /*
@@ -102,11 +124,9 @@ struct x86_emulate_ops {
102 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 124 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
103 * @bytes: [IN ] Number of bytes to read from memory. 125 * @bytes: [IN ] Number of bytes to read from memory.
104 */ 126 */
105 int (*read_emulated)(unsigned long addr, 127 int (*read_emulated)(struct x86_emulate_ctxt *ctxt,
106 void *val, 128 unsigned long addr, void *val, unsigned int bytes,
107 unsigned int bytes, 129 struct x86_exception *fault);
108 struct x86_exception *fault,
109 struct kvm_vcpu *vcpu);
110 130
111 /* 131 /*
112 * write_emulated: Write bytes to emulated/special memory area. 132 * write_emulated: Write bytes to emulated/special memory area.
@@ -115,11 +135,10 @@ struct x86_emulate_ops {
115 * required). 135 * required).
116 * @bytes: [IN ] Number of bytes to write to memory. 136 * @bytes: [IN ] Number of bytes to write to memory.
117 */ 137 */
118 int (*write_emulated)(unsigned long addr, 138 int (*write_emulated)(struct x86_emulate_ctxt *ctxt,
119 const void *val, 139 unsigned long addr, const void *val,
120 unsigned int bytes, 140 unsigned int bytes,
121 struct x86_exception *fault, 141 struct x86_exception *fault);
122 struct kvm_vcpu *vcpu);
123 142
124 /* 143 /*
125 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an 144 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -129,40 +148,54 @@ struct x86_emulate_ops {
129 * @new: [IN ] Value to write to @addr. 148 * @new: [IN ] Value to write to @addr.
130 * @bytes: [IN ] Number of bytes to access using CMPXCHG. 149 * @bytes: [IN ] Number of bytes to access using CMPXCHG.
131 */ 150 */
132 int (*cmpxchg_emulated)(unsigned long addr, 151 int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt,
152 unsigned long addr,
133 const void *old, 153 const void *old,
134 const void *new, 154 const void *new,
135 unsigned int bytes, 155 unsigned int bytes,
136 struct x86_exception *fault, 156 struct x86_exception *fault);
137 struct kvm_vcpu *vcpu); 157 void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr);
138 158
139 int (*pio_in_emulated)(int size, unsigned short port, void *val, 159 int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt,
140 unsigned int count, struct kvm_vcpu *vcpu); 160 int size, unsigned short port, void *val,
141 161 unsigned int count);
142 int (*pio_out_emulated)(int size, unsigned short port, const void *val, 162
143 unsigned int count, struct kvm_vcpu *vcpu); 163 int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt,
144 164 int size, unsigned short port, const void *val,
145 bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, 165 unsigned int count);
146 int seg, struct kvm_vcpu *vcpu); 166
147 void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, 167 bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector,
148 int seg, struct kvm_vcpu *vcpu); 168 struct desc_struct *desc, u32 *base3, int seg);
149 u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); 169 void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector,
150 void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); 170 struct desc_struct *desc, u32 base3, int seg);
151 unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); 171 unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt,
152 void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); 172 int seg);
153 void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); 173 void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
154 ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); 174 void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
155 int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); 175 void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
156 int (*cpl)(struct kvm_vcpu *vcpu); 176 void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
157 int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); 177 ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
158 int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); 178 int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
159 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 179 int (*cpl)(struct x86_emulate_ctxt *ctxt);
160 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); 180 int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
181 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
182 int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
183 int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
184 void (*halt)(struct x86_emulate_ctxt *ctxt);
185 void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
186 int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
187 void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
188 void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
189 int (*intercept)(struct x86_emulate_ctxt *ctxt,
190 struct x86_instruction_info *info,
191 enum x86_intercept_stage stage);
161}; 192};
162 193
194typedef u32 __attribute__((vector_size(16))) sse128_t;
195
163/* Type, address-of, and value of an instruction's operand. */ 196/* Type, address-of, and value of an instruction's operand. */
164struct operand { 197struct operand {
165 enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; 198 enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
166 unsigned int bytes; 199 unsigned int bytes;
167 union { 200 union {
168 unsigned long orig_val; 201 unsigned long orig_val;
@@ -174,11 +207,13 @@ struct operand {
174 ulong ea; 207 ulong ea;
175 unsigned seg; 208 unsigned seg;
176 } mem; 209 } mem;
210 unsigned xmm;
177 } addr; 211 } addr;
178 union { 212 union {
179 unsigned long val; 213 unsigned long val;
180 u64 val64; 214 u64 val64;
181 char valptr[sizeof(unsigned long) + 2]; 215 char valptr[sizeof(unsigned long) + 2];
216 sse128_t vec_val;
182 }; 217 };
183}; 218};
184 219
@@ -197,6 +232,7 @@ struct read_cache {
197struct decode_cache { 232struct decode_cache {
198 u8 twobyte; 233 u8 twobyte;
199 u8 b; 234 u8 b;
235 u8 intercept;
200 u8 lock_prefix; 236 u8 lock_prefix;
201 u8 rep_prefix; 237 u8 rep_prefix;
202 u8 op_bytes; 238 u8 op_bytes;
@@ -209,6 +245,7 @@ struct decode_cache {
209 u8 seg_override; 245 u8 seg_override;
210 unsigned int d; 246 unsigned int d;
211 int (*execute)(struct x86_emulate_ctxt *ctxt); 247 int (*execute)(struct x86_emulate_ctxt *ctxt);
248 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
212 unsigned long regs[NR_VCPU_REGS]; 249 unsigned long regs[NR_VCPU_REGS];
213 unsigned long eip; 250 unsigned long eip;
214 /* modrm */ 251 /* modrm */
@@ -227,17 +264,15 @@ struct x86_emulate_ctxt {
227 struct x86_emulate_ops *ops; 264 struct x86_emulate_ops *ops;
228 265
229 /* Register state before/after emulation. */ 266 /* Register state before/after emulation. */
230 struct kvm_vcpu *vcpu;
231
232 unsigned long eflags; 267 unsigned long eflags;
233 unsigned long eip; /* eip before instruction emulation */ 268 unsigned long eip; /* eip before instruction emulation */
234 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 269 /* Emulated execution mode, represented by an X86EMUL_MODE value. */
235 int mode; 270 int mode;
236 u32 cs_base;
237 271
238 /* interruptibility state, as a result of execution of STI or MOV SS */ 272 /* interruptibility state, as a result of execution of STI or MOV SS */
239 int interruptibility; 273 int interruptibility;
240 274
275 bool guest_mode; /* guest running a nested guest */
241 bool perm_ok; /* do not check permissions if true */ 276 bool perm_ok; /* do not check permissions if true */
242 bool only_vendor_specific_insn; 277 bool only_vendor_specific_insn;
243 278
@@ -249,8 +284,8 @@ struct x86_emulate_ctxt {
249}; 284};
250 285
251/* Repeat String Operation Prefix */ 286/* Repeat String Operation Prefix */
252#define REPE_PREFIX 1 287#define REPE_PREFIX 0xf3
253#define REPNE_PREFIX 2 288#define REPNE_PREFIX 0xf2
254 289
255/* Execution mode, passed to the emulator. */ 290/* Execution mode, passed to the emulator. */
256#define X86EMUL_MODE_REAL 0 /* Real mode. */ 291#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -259,6 +294,69 @@ struct x86_emulate_ctxt {
259#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ 294#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
260#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 295#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
261 296
297/* any protected mode */
298#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
299 X86EMUL_MODE_PROT64)
300
301enum x86_intercept_stage {
302 X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
303 X86_ICPT_PRE_EXCEPT,
304 X86_ICPT_POST_EXCEPT,
305 X86_ICPT_POST_MEMACCESS,
306};
307
308enum x86_intercept {
309 x86_intercept_none,
310 x86_intercept_cr_read,
311 x86_intercept_cr_write,
312 x86_intercept_clts,
313 x86_intercept_lmsw,
314 x86_intercept_smsw,
315 x86_intercept_dr_read,
316 x86_intercept_dr_write,
317 x86_intercept_lidt,
318 x86_intercept_sidt,
319 x86_intercept_lgdt,
320 x86_intercept_sgdt,
321 x86_intercept_lldt,
322 x86_intercept_sldt,
323 x86_intercept_ltr,
324 x86_intercept_str,
325 x86_intercept_rdtsc,
326 x86_intercept_rdpmc,
327 x86_intercept_pushf,
328 x86_intercept_popf,
329 x86_intercept_cpuid,
330 x86_intercept_rsm,
331 x86_intercept_iret,
332 x86_intercept_intn,
333 x86_intercept_invd,
334 x86_intercept_pause,
335 x86_intercept_hlt,
336 x86_intercept_invlpg,
337 x86_intercept_invlpga,
338 x86_intercept_vmrun,
339 x86_intercept_vmload,
340 x86_intercept_vmsave,
341 x86_intercept_vmmcall,
342 x86_intercept_stgi,
343 x86_intercept_clgi,
344 x86_intercept_skinit,
345 x86_intercept_rdtscp,
346 x86_intercept_icebp,
347 x86_intercept_wbinvd,
348 x86_intercept_monitor,
349 x86_intercept_mwait,
350 x86_intercept_rdmsr,
351 x86_intercept_wrmsr,
352 x86_intercept_in,
353 x86_intercept_ins,
354 x86_intercept_out,
355 x86_intercept_outs,
356
357 nr_x86_intercepts
358};
359
262/* Host execution mode. */ 360/* Host execution mode. */
263#if defined(CONFIG_X86_32) 361#if defined(CONFIG_X86_32)
264#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 362#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
@@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
270#define EMULATION_FAILED -1 368#define EMULATION_FAILED -1
271#define EMULATION_OK 0 369#define EMULATION_OK 0
272#define EMULATION_RESTART 1 370#define EMULATION_RESTART 1
371#define EMULATION_INTERCEPTED 2
273int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); 372int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
274int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 373int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
275 u16 tss_selector, int reason, 374 u16 tss_selector, int reason,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c8af0991fdf0..d2ac8e2ee897 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -30,14 +30,30 @@
30#define KVM_MEMORY_SLOTS 32 30#define KVM_MEMORY_SLOTS 32
31/* memory slots that does not exposed to userspace */ 31/* memory slots that does not exposed to userspace */
32#define KVM_PRIVATE_MEM_SLOTS 4 32#define KVM_PRIVATE_MEM_SLOTS 4
33#define KVM_MMIO_SIZE 16
33 34
34#define KVM_PIO_PAGE_OFFSET 1 35#define KVM_PIO_PAGE_OFFSET 1
35#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 36#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
36 37
38#define CR0_RESERVED_BITS \
39 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
40 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
41 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
42
37#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) 43#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
38#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) 44#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
39#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 45#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
40 0xFFFFFF0000000000ULL) 46 0xFFFFFF0000000000ULL)
47#define CR4_RESERVED_BITS \
48 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
49 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
50 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
51 | X86_CR4_OSXSAVE \
52 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
53
54#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
55
56
41 57
42#define INVALID_PAGE (~(hpa_t)0) 58#define INVALID_PAGE (~(hpa_t)0)
43#define VALID_PAGE(x) ((x) != INVALID_PAGE) 59#define VALID_PAGE(x) ((x) != INVALID_PAGE)
@@ -118,6 +134,9 @@ enum kvm_reg {
118enum kvm_reg_ex { 134enum kvm_reg_ex {
119 VCPU_EXREG_PDPTR = NR_VCPU_REGS, 135 VCPU_EXREG_PDPTR = NR_VCPU_REGS,
120 VCPU_EXREG_CR3, 136 VCPU_EXREG_CR3,
137 VCPU_EXREG_RFLAGS,
138 VCPU_EXREG_CPL,
139 VCPU_EXREG_SEGMENTS,
121}; 140};
122 141
123enum { 142enum {
@@ -256,7 +275,7 @@ struct kvm_mmu {
256 struct kvm_mmu_page *sp); 275 struct kvm_mmu_page *sp);
257 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); 276 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
258 void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 277 void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
259 u64 *spte, const void *pte, unsigned long mmu_seq); 278 u64 *spte, const void *pte);
260 hpa_t root_hpa; 279 hpa_t root_hpa;
261 int root_level; 280 int root_level;
262 int shadow_root_level; 281 int shadow_root_level;
@@ -340,7 +359,6 @@ struct kvm_vcpu_arch {
340 struct fpu guest_fpu; 359 struct fpu guest_fpu;
341 u64 xcr0; 360 u64 xcr0;
342 361
343 gva_t mmio_fault_cr2;
344 struct kvm_pio_request pio; 362 struct kvm_pio_request pio;
345 void *pio_data; 363 void *pio_data;
346 364
@@ -367,18 +385,22 @@ struct kvm_vcpu_arch {
367 /* emulate context */ 385 /* emulate context */
368 386
369 struct x86_emulate_ctxt emulate_ctxt; 387 struct x86_emulate_ctxt emulate_ctxt;
388 bool emulate_regs_need_sync_to_vcpu;
389 bool emulate_regs_need_sync_from_vcpu;
370 390
371 gpa_t time; 391 gpa_t time;
372 struct pvclock_vcpu_time_info hv_clock; 392 struct pvclock_vcpu_time_info hv_clock;
373 unsigned int hw_tsc_khz; 393 unsigned int hw_tsc_khz;
374 unsigned int time_offset; 394 unsigned int time_offset;
375 struct page *time_page; 395 struct page *time_page;
376 u64 last_host_tsc;
377 u64 last_guest_tsc; 396 u64 last_guest_tsc;
378 u64 last_kernel_ns; 397 u64 last_kernel_ns;
379 u64 last_tsc_nsec; 398 u64 last_tsc_nsec;
380 u64 last_tsc_write; 399 u64 last_tsc_write;
400 u32 virtual_tsc_khz;
381 bool tsc_catchup; 401 bool tsc_catchup;
402 u32 tsc_catchup_mult;
403 s8 tsc_catchup_shift;
382 404
383 bool nmi_pending; 405 bool nmi_pending;
384 bool nmi_injected; 406 bool nmi_injected;
@@ -448,9 +470,6 @@ struct kvm_arch {
448 u64 last_tsc_nsec; 470 u64 last_tsc_nsec;
449 u64 last_tsc_offset; 471 u64 last_tsc_offset;
450 u64 last_tsc_write; 472 u64 last_tsc_write;
451 u32 virtual_tsc_khz;
452 u32 virtual_tsc_mult;
453 s8 virtual_tsc_shift;
454 473
455 struct kvm_xen_hvm_config xen_hvm_config; 474 struct kvm_xen_hvm_config xen_hvm_config;
456 475
@@ -502,6 +521,8 @@ struct kvm_vcpu_stat {
502 u32 nmi_injections; 521 u32 nmi_injections;
503}; 522};
504 523
524struct x86_instruction_info;
525
505struct kvm_x86_ops { 526struct kvm_x86_ops {
506 int (*cpu_has_kvm_support)(void); /* __init */ 527 int (*cpu_has_kvm_support)(void); /* __init */
507 int (*disabled_by_bios)(void); /* __init */ 528 int (*disabled_by_bios)(void); /* __init */
@@ -586,9 +607,17 @@ struct kvm_x86_ops {
586 607
587 bool (*has_wbinvd_exit)(void); 608 bool (*has_wbinvd_exit)(void);
588 609
610 void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz);
589 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); 611 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
590 612
613 u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
614
591 void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); 615 void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
616
617 int (*check_intercept)(struct kvm_vcpu *vcpu,
618 struct x86_instruction_info *info,
619 enum x86_intercept_stage stage);
620
592 const struct trace_print_flags *exit_reasons_str; 621 const struct trace_print_flags *exit_reasons_str;
593}; 622};
594 623
@@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
627 656
628extern bool tdp_enabled; 657extern bool tdp_enabled;
629 658
659/* control of guest tsc rate supported? */
660extern bool kvm_has_tsc_control;
661/* minimum supported tsc_khz for guests */
662extern u32 kvm_min_guest_tsc_khz;
663/* maximum supported tsc_khz for guests */
664extern u32 kvm_max_guest_tsc_khz;
665
630enum emulation_result { 666enum emulation_result {
631 EMULATE_DONE, /* no further processing */ 667 EMULATE_DONE, /* no further processing */
632 EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ 668 EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
@@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
645 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); 681 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
646} 682}
647 683
648void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
649void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
650
651void kvm_enable_efer_bits(u64); 684void kvm_enable_efer_bits(u64);
652int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); 685int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
653int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 686int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
@@ -657,8 +690,6 @@ struct x86_emulate_ctxt;
657int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); 690int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
658void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); 691void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
659int kvm_emulate_halt(struct kvm_vcpu *vcpu); 692int kvm_emulate_halt(struct kvm_vcpu *vcpu);
660int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
661int emulate_clts(struct kvm_vcpu *vcpu);
662int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); 693int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
663 694
664void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 695void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
721 752
722int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 753int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
723 754
724int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
725
726int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, 755int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
727 void *insn, int insn_len); 756 void *insn, int insn_len);
728void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); 757void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3cce71413d0b..485b4f1f079b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -118,6 +118,7 @@
118 complete list. */ 118 complete list. */
119 119
120#define MSR_AMD64_PATCH_LEVEL 0x0000008b 120#define MSR_AMD64_PATCH_LEVEL 0x0000008b
121#define MSR_AMD64_TSC_RATIO 0xc0000104
121#define MSR_AMD64_NB_CFG 0xc001001f 122#define MSR_AMD64_NB_CFG 0xc001001f
122#define MSR_AMD64_PATCH_LOADER 0xc0010020 123#define MSR_AMD64_PATCH_LOADER 0xc0010020
123#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 124#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0ad47b819a8b..d6e2477feb18 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -73,9 +73,14 @@
73#define MemAbs (1<<11) /* Memory operand is absolute displacement */ 73#define MemAbs (1<<11) /* Memory operand is absolute displacement */
74#define String (1<<12) /* String instruction (rep capable) */ 74#define String (1<<12) /* String instruction (rep capable) */
75#define Stack (1<<13) /* Stack instruction (push/pop) */ 75#define Stack (1<<13) /* Stack instruction (push/pop) */
76#define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */
76#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ 77#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
77#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 78#define GroupDual (2<<14) /* Alternate decoding of mod == 3 */
79#define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */
80#define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */
81#define Sse (1<<17) /* SSE Vector instruction */
78/* Misc flags */ 82/* Misc flags */
83#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
79#define VendorSpecific (1<<22) /* Vendor specific instruction */ 84#define VendorSpecific (1<<22) /* Vendor specific instruction */
80#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ 85#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
81#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ 86#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
@@ -102,11 +107,14 @@
102 107
103struct opcode { 108struct opcode {
104 u32 flags; 109 u32 flags;
110 u8 intercept;
105 union { 111 union {
106 int (*execute)(struct x86_emulate_ctxt *ctxt); 112 int (*execute)(struct x86_emulate_ctxt *ctxt);
107 struct opcode *group; 113 struct opcode *group;
108 struct group_dual *gdual; 114 struct group_dual *gdual;
115 struct gprefix *gprefix;
109 } u; 116 } u;
117 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
110}; 118};
111 119
112struct group_dual { 120struct group_dual {
@@ -114,6 +122,13 @@ struct group_dual {
114 struct opcode mod3[8]; 122 struct opcode mod3[8];
115}; 123};
116 124
125struct gprefix {
126 struct opcode pfx_no;
127 struct opcode pfx_66;
128 struct opcode pfx_f2;
129 struct opcode pfx_f3;
130};
131
117/* EFLAGS bit definitions. */ 132/* EFLAGS bit definitions. */
118#define EFLG_ID (1<<21) 133#define EFLG_ID (1<<21)
119#define EFLG_VIP (1<<20) 134#define EFLG_VIP (1<<20)
@@ -248,42 +263,42 @@ struct group_dual {
248 "w", "r", _LO32, "r", "", "r") 263 "w", "r", _LO32, "r", "", "r")
249 264
250/* Instruction has three operands and one operand is stored in ECX register */ 265/* Instruction has three operands and one operand is stored in ECX register */
251#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ 266#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
252 do { \ 267 do { \
253 unsigned long _tmp; \ 268 unsigned long _tmp; \
254 _type _clv = (_cl).val; \ 269 _type _clv = (_cl).val; \
255 _type _srcv = (_src).val; \ 270 _type _srcv = (_src).val; \
256 _type _dstv = (_dst).val; \ 271 _type _dstv = (_dst).val; \
257 \ 272 \
258 __asm__ __volatile__ ( \ 273 __asm__ __volatile__ ( \
259 _PRE_EFLAGS("0", "5", "2") \ 274 _PRE_EFLAGS("0", "5", "2") \
260 _op _suffix " %4,%1 \n" \ 275 _op _suffix " %4,%1 \n" \
261 _POST_EFLAGS("0", "5", "2") \ 276 _POST_EFLAGS("0", "5", "2") \
262 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ 277 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
263 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ 278 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
264 ); \ 279 ); \
265 \ 280 \
266 (_cl).val = (unsigned long) _clv; \ 281 (_cl).val = (unsigned long) _clv; \
267 (_src).val = (unsigned long) _srcv; \ 282 (_src).val = (unsigned long) _srcv; \
268 (_dst).val = (unsigned long) _dstv; \ 283 (_dst).val = (unsigned long) _dstv; \
269 } while (0) 284 } while (0)
270 285
271#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ 286#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
272 do { \ 287 do { \
273 switch ((_dst).bytes) { \ 288 switch ((_dst).bytes) { \
274 case 2: \ 289 case 2: \
275 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 290 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
276 "w", unsigned short); \ 291 "w", unsigned short); \
277 break; \ 292 break; \
278 case 4: \ 293 case 4: \
279 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 294 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
280 "l", unsigned int); \ 295 "l", unsigned int); \
281 break; \ 296 break; \
282 case 8: \ 297 case 8: \
283 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 298 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
284 "q", unsigned long)); \ 299 "q", unsigned long)); \
285 break; \ 300 break; \
286 } \ 301 } \
287 } while (0) 302 } while (0)
288 303
289#define __emulate_1op(_op, _dst, _eflags, _suffix) \ 304#define __emulate_1op(_op, _dst, _eflags, _suffix) \
@@ -346,13 +361,25 @@ struct group_dual {
346 } while (0) 361 } while (0)
347 362
348/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ 363/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
349#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ 364#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \
350 do { \ 365 do { \
351 switch((_src).bytes) { \ 366 switch((_src).bytes) { \
352 case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ 367 case 1: \
353 case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ 368 __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
354 case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ 369 _eflags, "b"); \
355 case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ 370 break; \
371 case 2: \
372 __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
373 _eflags, "w"); \
374 break; \
375 case 4: \
376 __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
377 _eflags, "l"); \
378 break; \
379 case 8: \
380 ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
381 _eflags, "q")); \
382 break; \
356 } \ 383 } \
357 } while (0) 384 } while (0)
358 385
@@ -388,13 +415,33 @@ struct group_dual {
388 (_type)_x; \ 415 (_type)_x; \
389}) 416})
390 417
391#define insn_fetch_arr(_arr, _size, _eip) \ 418#define insn_fetch_arr(_arr, _size, _eip) \
392({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ 419({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \
393 if (rc != X86EMUL_CONTINUE) \ 420 if (rc != X86EMUL_CONTINUE) \
394 goto done; \ 421 goto done; \
395 (_eip) += (_size); \ 422 (_eip) += (_size); \
396}) 423})
397 424
425static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
426 enum x86_intercept intercept,
427 enum x86_intercept_stage stage)
428{
429 struct x86_instruction_info info = {
430 .intercept = intercept,
431 .rep_prefix = ctxt->decode.rep_prefix,
432 .modrm_mod = ctxt->decode.modrm_mod,
433 .modrm_reg = ctxt->decode.modrm_reg,
434 .modrm_rm = ctxt->decode.modrm_rm,
435 .src_val = ctxt->decode.src.val64,
436 .src_bytes = ctxt->decode.src.bytes,
437 .dst_bytes = ctxt->decode.dst.bytes,
438 .ad_bytes = ctxt->decode.ad_bytes,
439 .next_rip = ctxt->eip,
440 };
441
442 return ctxt->ops->intercept(ctxt, &info, stage);
443}
444
398static inline unsigned long ad_mask(struct decode_cache *c) 445static inline unsigned long ad_mask(struct decode_cache *c)
399{ 446{
400 return (1UL << (c->ad_bytes << 3)) - 1; 447 return (1UL << (c->ad_bytes << 3)) - 1;
@@ -430,6 +477,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel)
430 register_address_increment(c, &c->eip, rel); 477 register_address_increment(c, &c->eip, rel);
431} 478}
432 479
480static u32 desc_limit_scaled(struct desc_struct *desc)
481{
482 u32 limit = get_desc_limit(desc);
483
484 return desc->g ? (limit << 12) | 0xfff : limit;
485}
486
433static void set_seg_override(struct decode_cache *c, int seg) 487static void set_seg_override(struct decode_cache *c, int seg)
434{ 488{
435 c->has_seg_override = true; 489 c->has_seg_override = true;
@@ -442,11 +496,10 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
442 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) 496 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
443 return 0; 497 return 0;
444 498
445 return ops->get_cached_segment_base(seg, ctxt->vcpu); 499 return ops->get_cached_segment_base(ctxt, seg);
446} 500}
447 501
448static unsigned seg_override(struct x86_emulate_ctxt *ctxt, 502static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
449 struct x86_emulate_ops *ops,
450 struct decode_cache *c) 503 struct decode_cache *c)
451{ 504{
452 if (!c->has_seg_override) 505 if (!c->has_seg_override)
@@ -455,18 +508,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
455 return c->seg_override; 508 return c->seg_override;
456} 509}
457 510
458static ulong linear(struct x86_emulate_ctxt *ctxt,
459 struct segmented_address addr)
460{
461 struct decode_cache *c = &ctxt->decode;
462 ulong la;
463
464 la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
465 if (c->ad_bytes != 8)
466 la &= (u32)-1;
467 return la;
468}
469
470static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, 511static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
471 u32 error, bool valid) 512 u32 error, bool valid)
472{ 513{
@@ -476,11 +517,21 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
476 return X86EMUL_PROPAGATE_FAULT; 517 return X86EMUL_PROPAGATE_FAULT;
477} 518}
478 519
520static int emulate_db(struct x86_emulate_ctxt *ctxt)
521{
522 return emulate_exception(ctxt, DB_VECTOR, 0, false);
523}
524
479static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) 525static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
480{ 526{
481 return emulate_exception(ctxt, GP_VECTOR, err, true); 527 return emulate_exception(ctxt, GP_VECTOR, err, true);
482} 528}
483 529
530static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
531{
532 return emulate_exception(ctxt, SS_VECTOR, err, true);
533}
534
484static int emulate_ud(struct x86_emulate_ctxt *ctxt) 535static int emulate_ud(struct x86_emulate_ctxt *ctxt)
485{ 536{
486 return emulate_exception(ctxt, UD_VECTOR, 0, false); 537 return emulate_exception(ctxt, UD_VECTOR, 0, false);
@@ -496,6 +547,128 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
496 return emulate_exception(ctxt, DE_VECTOR, 0, false); 547 return emulate_exception(ctxt, DE_VECTOR, 0, false);
497} 548}
498 549
550static int emulate_nm(struct x86_emulate_ctxt *ctxt)
551{
552 return emulate_exception(ctxt, NM_VECTOR, 0, false);
553}
554
555static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
556{
557 u16 selector;
558 struct desc_struct desc;
559
560 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
561 return selector;
562}
563
564static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
565 unsigned seg)
566{
567 u16 dummy;
568 u32 base3;
569 struct desc_struct desc;
570
571 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
572 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
573}
574
575static int __linearize(struct x86_emulate_ctxt *ctxt,
576 struct segmented_address addr,
577 unsigned size, bool write, bool fetch,
578 ulong *linear)
579{
580 struct decode_cache *c = &ctxt->decode;
581 struct desc_struct desc;
582 bool usable;
583 ulong la;
584 u32 lim;
585 u16 sel;
586 unsigned cpl, rpl;
587
588 la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
589 switch (ctxt->mode) {
590 case X86EMUL_MODE_REAL:
591 break;
592 case X86EMUL_MODE_PROT64:
593 if (((signed long)la << 16) >> 16 != la)
594 return emulate_gp(ctxt, 0);
595 break;
596 default:
597 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
598 addr.seg);
599 if (!usable)
600 goto bad;
601 /* code segment or read-only data segment */
602 if (((desc.type & 8) || !(desc.type & 2)) && write)
603 goto bad;
604 /* unreadable code segment */
605 if (!fetch && (desc.type & 8) && !(desc.type & 2))
606 goto bad;
607 lim = desc_limit_scaled(&desc);
608 if ((desc.type & 8) || !(desc.type & 4)) {
609 /* expand-up segment */
610 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
611 goto bad;
612 } else {
613 /* exapand-down segment */
614 if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
615 goto bad;
616 lim = desc.d ? 0xffffffff : 0xffff;
617 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
618 goto bad;
619 }
620 cpl = ctxt->ops->cpl(ctxt);
621 rpl = sel & 3;
622 cpl = max(cpl, rpl);
623 if (!(desc.type & 8)) {
624 /* data segment */
625 if (cpl > desc.dpl)
626 goto bad;
627 } else if ((desc.type & 8) && !(desc.type & 4)) {
628 /* nonconforming code segment */
629 if (cpl != desc.dpl)
630 goto bad;
631 } else if ((desc.type & 8) && (desc.type & 4)) {
632 /* conforming code segment */
633 if (cpl < desc.dpl)
634 goto bad;
635 }
636 break;
637 }
638 if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8)
639 la &= (u32)-1;
640 *linear = la;
641 return X86EMUL_CONTINUE;
642bad:
643 if (addr.seg == VCPU_SREG_SS)
644 return emulate_ss(ctxt, addr.seg);
645 else
646 return emulate_gp(ctxt, addr.seg);
647}
648
649static int linearize(struct x86_emulate_ctxt *ctxt,
650 struct segmented_address addr,
651 unsigned size, bool write,
652 ulong *linear)
653{
654 return __linearize(ctxt, addr, size, write, false, linear);
655}
656
657
658static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
659 struct segmented_address addr,
660 void *data,
661 unsigned size)
662{
663 int rc;
664 ulong linear;
665
666 rc = linearize(ctxt, addr, size, false, &linear);
667 if (rc != X86EMUL_CONTINUE)
668 return rc;
669 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
670}
671
499static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, 672static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
500 struct x86_emulate_ops *ops, 673 struct x86_emulate_ops *ops,
501 unsigned long eip, u8 *dest) 674 unsigned long eip, u8 *dest)
@@ -505,10 +678,15 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
505 int size, cur_size; 678 int size, cur_size;
506 679
507 if (eip == fc->end) { 680 if (eip == fc->end) {
681 unsigned long linear;
682 struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip};
508 cur_size = fc->end - fc->start; 683 cur_size = fc->end - fc->start;
509 size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); 684 size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
510 rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, 685 rc = __linearize(ctxt, addr, size, false, true, &linear);
511 size, ctxt->vcpu, &ctxt->exception); 686 if (rc != X86EMUL_CONTINUE)
687 return rc;
688 rc = ops->fetch(ctxt, linear, fc->data + cur_size,
689 size, &ctxt->exception);
512 if (rc != X86EMUL_CONTINUE) 690 if (rc != X86EMUL_CONTINUE)
513 return rc; 691 return rc;
514 fc->end += size; 692 fc->end += size;
@@ -551,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs,
551} 729}
552 730
553static int read_descriptor(struct x86_emulate_ctxt *ctxt, 731static int read_descriptor(struct x86_emulate_ctxt *ctxt,
554 struct x86_emulate_ops *ops,
555 struct segmented_address addr, 732 struct segmented_address addr,
556 u16 *size, unsigned long *address, int op_bytes) 733 u16 *size, unsigned long *address, int op_bytes)
557{ 734{
@@ -560,13 +737,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
560 if (op_bytes == 2) 737 if (op_bytes == 2)
561 op_bytes = 3; 738 op_bytes = 3;
562 *address = 0; 739 *address = 0;
563 rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, 740 rc = segmented_read_std(ctxt, addr, size, 2);
564 ctxt->vcpu, &ctxt->exception);
565 if (rc != X86EMUL_CONTINUE) 741 if (rc != X86EMUL_CONTINUE)
566 return rc; 742 return rc;
567 addr.ea += 2; 743 addr.ea += 2;
568 rc = ops->read_std(linear(ctxt, addr), address, op_bytes, 744 rc = segmented_read_std(ctxt, addr, address, op_bytes);
569 ctxt->vcpu, &ctxt->exception);
570 return rc; 745 return rc;
571} 746}
572 747
@@ -623,7 +798,63 @@ static void fetch_register_operand(struct operand *op)
623 } 798 }
624} 799}
625 800
626static void decode_register_operand(struct operand *op, 801static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
802{
803 ctxt->ops->get_fpu(ctxt);
804 switch (reg) {
805 case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
806 case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
807 case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
808 case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
809 case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
810 case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
811 case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
812 case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
813#ifdef CONFIG_X86_64
814 case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
815 case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
816 case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
817 case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
818 case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
819 case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
820 case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
821 case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
822#endif
823 default: BUG();
824 }
825 ctxt->ops->put_fpu(ctxt);
826}
827
828static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
829 int reg)
830{
831 ctxt->ops->get_fpu(ctxt);
832 switch (reg) {
833 case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
834 case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
835 case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
836 case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
837 case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
838 case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
839 case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
840 case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
841#ifdef CONFIG_X86_64
842 case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
843 case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
844 case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
845 case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
846 case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
847 case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
848 case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
849 case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
850#endif
851 default: BUG();
852 }
853 ctxt->ops->put_fpu(ctxt);
854}
855
856static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
857 struct operand *op,
627 struct decode_cache *c, 858 struct decode_cache *c,
628 int inhibit_bytereg) 859 int inhibit_bytereg)
629{ 860{
@@ -632,6 +863,15 @@ static void decode_register_operand(struct operand *op,
632 863
633 if (!(c->d & ModRM)) 864 if (!(c->d & ModRM))
634 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); 865 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
866
867 if (c->d & Sse) {
868 op->type = OP_XMM;
869 op->bytes = 16;
870 op->addr.xmm = reg;
871 read_sse_reg(ctxt, &op->vec_val, reg);
872 return;
873 }
874
635 op->type = OP_REG; 875 op->type = OP_REG;
636 if ((c->d & ByteOp) && !inhibit_bytereg) { 876 if ((c->d & ByteOp) && !inhibit_bytereg) {
637 op->addr.reg = decode_register(reg, c->regs, highbyte_regs); 877 op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
@@ -671,6 +911,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
671 op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 911 op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
672 op->addr.reg = decode_register(c->modrm_rm, 912 op->addr.reg = decode_register(c->modrm_rm,
673 c->regs, c->d & ByteOp); 913 c->regs, c->d & ByteOp);
914 if (c->d & Sse) {
915 op->type = OP_XMM;
916 op->bytes = 16;
917 op->addr.xmm = c->modrm_rm;
918 read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
919 return rc;
920 }
674 fetch_register_operand(op); 921 fetch_register_operand(op);
675 return rc; 922 return rc;
676 } 923 }
@@ -819,8 +1066,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
819 if (mc->pos < mc->end) 1066 if (mc->pos < mc->end)
820 goto read_cached; 1067 goto read_cached;
821 1068
822 rc = ops->read_emulated(addr, mc->data + mc->end, n, 1069 rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
823 &ctxt->exception, ctxt->vcpu); 1070 &ctxt->exception);
824 if (rc != X86EMUL_CONTINUE) 1071 if (rc != X86EMUL_CONTINUE)
825 return rc; 1072 return rc;
826 mc->end += n; 1073 mc->end += n;
@@ -834,6 +1081,50 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
834 return X86EMUL_CONTINUE; 1081 return X86EMUL_CONTINUE;
835} 1082}
836 1083
1084static int segmented_read(struct x86_emulate_ctxt *ctxt,
1085 struct segmented_address addr,
1086 void *data,
1087 unsigned size)
1088{
1089 int rc;
1090 ulong linear;
1091
1092 rc = linearize(ctxt, addr, size, false, &linear);
1093 if (rc != X86EMUL_CONTINUE)
1094 return rc;
1095 return read_emulated(ctxt, ctxt->ops, linear, data, size);
1096}
1097
1098static int segmented_write(struct x86_emulate_ctxt *ctxt,
1099 struct segmented_address addr,
1100 const void *data,
1101 unsigned size)
1102{
1103 int rc;
1104 ulong linear;
1105
1106 rc = linearize(ctxt, addr, size, true, &linear);
1107 if (rc != X86EMUL_CONTINUE)
1108 return rc;
1109 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1110 &ctxt->exception);
1111}
1112
1113static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1114 struct segmented_address addr,
1115 const void *orig_data, const void *data,
1116 unsigned size)
1117{
1118 int rc;
1119 ulong linear;
1120
1121 rc = linearize(ctxt, addr, size, true, &linear);
1122 if (rc != X86EMUL_CONTINUE)
1123 return rc;
1124 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1125 size, &ctxt->exception);
1126}
1127
837static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, 1128static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
838 struct x86_emulate_ops *ops, 1129 struct x86_emulate_ops *ops,
839 unsigned int size, unsigned short port, 1130 unsigned int size, unsigned short port,
@@ -854,7 +1145,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
854 if (n == 0) 1145 if (n == 0)
855 n = 1; 1146 n = 1;
856 rc->pos = rc->end = 0; 1147 rc->pos = rc->end = 0;
857 if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) 1148 if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n))
858 return 0; 1149 return 0;
859 rc->end = n * size; 1150 rc->end = n * size;
860 } 1151 }
@@ -864,28 +1155,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
864 return 1; 1155 return 1;
865} 1156}
866 1157
867static u32 desc_limit_scaled(struct desc_struct *desc)
868{
869 u32 limit = get_desc_limit(desc);
870
871 return desc->g ? (limit << 12) | 0xfff : limit;
872}
873
874static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, 1158static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
875 struct x86_emulate_ops *ops, 1159 struct x86_emulate_ops *ops,
876 u16 selector, struct desc_ptr *dt) 1160 u16 selector, struct desc_ptr *dt)
877{ 1161{
878 if (selector & 1 << 2) { 1162 if (selector & 1 << 2) {
879 struct desc_struct desc; 1163 struct desc_struct desc;
1164 u16 sel;
1165
880 memset (dt, 0, sizeof *dt); 1166 memset (dt, 0, sizeof *dt);
881 if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, 1167 if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR))
882 ctxt->vcpu))
883 return; 1168 return;
884 1169
885 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ 1170 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
886 dt->address = get_desc_base(&desc); 1171 dt->address = get_desc_base(&desc);
887 } else 1172 } else
888 ops->get_gdt(dt, ctxt->vcpu); 1173 ops->get_gdt(ctxt, dt);
889} 1174}
890 1175
891/* allowed just for 8 bytes segments */ 1176/* allowed just for 8 bytes segments */
@@ -903,8 +1188,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
903 if (dt.size < index * 8 + 7) 1188 if (dt.size < index * 8 + 7)
904 return emulate_gp(ctxt, selector & 0xfffc); 1189 return emulate_gp(ctxt, selector & 0xfffc);
905 addr = dt.address + index * 8; 1190 addr = dt.address + index * 8;
906 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, 1191 ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception);
907 &ctxt->exception);
908 1192
909 return ret; 1193 return ret;
910} 1194}
@@ -925,8 +1209,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
925 return emulate_gp(ctxt, selector & 0xfffc); 1209 return emulate_gp(ctxt, selector & 0xfffc);
926 1210
927 addr = dt.address + index * 8; 1211 addr = dt.address + index * 8;
928 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, 1212 ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception);
929 &ctxt->exception);
930 1213
931 return ret; 1214 return ret;
932} 1215}
@@ -986,7 +1269,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
986 1269
987 rpl = selector & 3; 1270 rpl = selector & 3;
988 dpl = seg_desc.dpl; 1271 dpl = seg_desc.dpl;
989 cpl = ops->cpl(ctxt->vcpu); 1272 cpl = ops->cpl(ctxt);
990 1273
991 switch (seg) { 1274 switch (seg) {
992 case VCPU_SREG_SS: 1275 case VCPU_SREG_SS:
@@ -1042,8 +1325,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1042 return ret; 1325 return ret;
1043 } 1326 }
1044load: 1327load:
1045 ops->set_segment_selector(selector, seg, ctxt->vcpu); 1328 ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
1046 ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu);
1047 return X86EMUL_CONTINUE; 1329 return X86EMUL_CONTINUE;
1048exception: 1330exception:
1049 emulate_exception(ctxt, err_vec, err_code, true); 1331 emulate_exception(ctxt, err_vec, err_code, true);
@@ -1069,8 +1351,7 @@ static void write_register_operand(struct operand *op)
1069 } 1351 }
1070} 1352}
1071 1353
1072static inline int writeback(struct x86_emulate_ctxt *ctxt, 1354static int writeback(struct x86_emulate_ctxt *ctxt)
1073 struct x86_emulate_ops *ops)
1074{ 1355{
1075 int rc; 1356 int rc;
1076 struct decode_cache *c = &ctxt->decode; 1357 struct decode_cache *c = &ctxt->decode;
@@ -1081,23 +1362,22 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
1081 break; 1362 break;
1082 case OP_MEM: 1363 case OP_MEM:
1083 if (c->lock_prefix) 1364 if (c->lock_prefix)
1084 rc = ops->cmpxchg_emulated( 1365 rc = segmented_cmpxchg(ctxt,
1085 linear(ctxt, c->dst.addr.mem), 1366 c->dst.addr.mem,
1086 &c->dst.orig_val, 1367 &c->dst.orig_val,
1087 &c->dst.val, 1368 &c->dst.val,
1088 c->dst.bytes, 1369 c->dst.bytes);
1089 &ctxt->exception,
1090 ctxt->vcpu);
1091 else 1370 else
1092 rc = ops->write_emulated( 1371 rc = segmented_write(ctxt,
1093 linear(ctxt, c->dst.addr.mem), 1372 c->dst.addr.mem,
1094 &c->dst.val, 1373 &c->dst.val,
1095 c->dst.bytes, 1374 c->dst.bytes);
1096 &ctxt->exception,
1097 ctxt->vcpu);
1098 if (rc != X86EMUL_CONTINUE) 1375 if (rc != X86EMUL_CONTINUE)
1099 return rc; 1376 return rc;
1100 break; 1377 break;
1378 case OP_XMM:
1379 write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
1380 break;
1101 case OP_NONE: 1381 case OP_NONE:
1102 /* no writeback */ 1382 /* no writeback */
1103 break; 1383 break;
@@ -1107,21 +1387,21 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
1107 return X86EMUL_CONTINUE; 1387 return X86EMUL_CONTINUE;
1108} 1388}
1109 1389
1110static inline void emulate_push(struct x86_emulate_ctxt *ctxt, 1390static int em_push(struct x86_emulate_ctxt *ctxt)
1111 struct x86_emulate_ops *ops)
1112{ 1391{
1113 struct decode_cache *c = &ctxt->decode; 1392 struct decode_cache *c = &ctxt->decode;
1393 struct segmented_address addr;
1114 1394
1115 c->dst.type = OP_MEM;
1116 c->dst.bytes = c->op_bytes;
1117 c->dst.val = c->src.val;
1118 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); 1395 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1119 c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); 1396 addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
1120 c->dst.addr.mem.seg = VCPU_SREG_SS; 1397 addr.seg = VCPU_SREG_SS;
1398
1399 /* Disable writeback. */
1400 c->dst.type = OP_NONE;
1401 return segmented_write(ctxt, addr, &c->src.val, c->op_bytes);
1121} 1402}
1122 1403
1123static int emulate_pop(struct x86_emulate_ctxt *ctxt, 1404static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1124 struct x86_emulate_ops *ops,
1125 void *dest, int len) 1405 void *dest, int len)
1126{ 1406{
1127 struct decode_cache *c = &ctxt->decode; 1407 struct decode_cache *c = &ctxt->decode;
@@ -1130,7 +1410,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1130 1410
1131 addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); 1411 addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
1132 addr.seg = VCPU_SREG_SS; 1412 addr.seg = VCPU_SREG_SS;
1133 rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); 1413 rc = segmented_read(ctxt, addr, dest, len);
1134 if (rc != X86EMUL_CONTINUE) 1414 if (rc != X86EMUL_CONTINUE)
1135 return rc; 1415 return rc;
1136 1416
@@ -1138,6 +1418,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1138 return rc; 1418 return rc;
1139} 1419}
1140 1420
1421static int em_pop(struct x86_emulate_ctxt *ctxt)
1422{
1423 struct decode_cache *c = &ctxt->decode;
1424
1425 return emulate_pop(ctxt, &c->dst.val, c->op_bytes);
1426}
1427
1141static int emulate_popf(struct x86_emulate_ctxt *ctxt, 1428static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1142 struct x86_emulate_ops *ops, 1429 struct x86_emulate_ops *ops,
1143 void *dest, int len) 1430 void *dest, int len)
@@ -1145,9 +1432,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1145 int rc; 1432 int rc;
1146 unsigned long val, change_mask; 1433 unsigned long val, change_mask;
1147 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 1434 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1148 int cpl = ops->cpl(ctxt->vcpu); 1435 int cpl = ops->cpl(ctxt);
1149 1436
1150 rc = emulate_pop(ctxt, ops, &val, len); 1437 rc = emulate_pop(ctxt, &val, len);
1151 if (rc != X86EMUL_CONTINUE) 1438 if (rc != X86EMUL_CONTINUE)
1152 return rc; 1439 return rc;
1153 1440
@@ -1179,14 +1466,24 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1179 return rc; 1466 return rc;
1180} 1467}
1181 1468
1182static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, 1469static int em_popf(struct x86_emulate_ctxt *ctxt)
1183 struct x86_emulate_ops *ops, int seg)
1184{ 1470{
1185 struct decode_cache *c = &ctxt->decode; 1471 struct decode_cache *c = &ctxt->decode;
1186 1472
1187 c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); 1473 c->dst.type = OP_REG;
1474 c->dst.addr.reg = &ctxt->eflags;
1475 c->dst.bytes = c->op_bytes;
1476 return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
1477}
1188 1478
1189 emulate_push(ctxt, ops); 1479static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
1480 struct x86_emulate_ops *ops, int seg)
1481{
1482 struct decode_cache *c = &ctxt->decode;
1483
1484 c->src.val = get_segment_selector(ctxt, seg);
1485
1486 return em_push(ctxt);
1190} 1487}
1191 1488
1192static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, 1489static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
@@ -1196,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1196 unsigned long selector; 1493 unsigned long selector;
1197 int rc; 1494 int rc;
1198 1495
1199 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); 1496 rc = emulate_pop(ctxt, &selector, c->op_bytes);
1200 if (rc != X86EMUL_CONTINUE) 1497 if (rc != X86EMUL_CONTINUE)
1201 return rc; 1498 return rc;
1202 1499
@@ -1204,8 +1501,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1204 return rc; 1501 return rc;
1205} 1502}
1206 1503
1207static int emulate_pusha(struct x86_emulate_ctxt *ctxt, 1504static int em_pusha(struct x86_emulate_ctxt *ctxt)
1208 struct x86_emulate_ops *ops)
1209{ 1505{
1210 struct decode_cache *c = &ctxt->decode; 1506 struct decode_cache *c = &ctxt->decode;
1211 unsigned long old_esp = c->regs[VCPU_REGS_RSP]; 1507 unsigned long old_esp = c->regs[VCPU_REGS_RSP];
@@ -1216,23 +1512,25 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
1216 (reg == VCPU_REGS_RSP) ? 1512 (reg == VCPU_REGS_RSP) ?
1217 (c->src.val = old_esp) : (c->src.val = c->regs[reg]); 1513 (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1218 1514
1219 emulate_push(ctxt, ops); 1515 rc = em_push(ctxt);
1220
1221 rc = writeback(ctxt, ops);
1222 if (rc != X86EMUL_CONTINUE) 1516 if (rc != X86EMUL_CONTINUE)
1223 return rc; 1517 return rc;
1224 1518
1225 ++reg; 1519 ++reg;
1226 } 1520 }
1227 1521
1228 /* Disable writeback. */
1229 c->dst.type = OP_NONE;
1230
1231 return rc; 1522 return rc;
1232} 1523}
1233 1524
1234static int emulate_popa(struct x86_emulate_ctxt *ctxt, 1525static int em_pushf(struct x86_emulate_ctxt *ctxt)
1235 struct x86_emulate_ops *ops) 1526{
1527 struct decode_cache *c = &ctxt->decode;
1528
1529 c->src.val = (unsigned long)ctxt->eflags;
1530 return em_push(ctxt);
1531}
1532
1533static int em_popa(struct x86_emulate_ctxt *ctxt)
1236{ 1534{
1237 struct decode_cache *c = &ctxt->decode; 1535 struct decode_cache *c = &ctxt->decode;
1238 int rc = X86EMUL_CONTINUE; 1536 int rc = X86EMUL_CONTINUE;
@@ -1245,7 +1543,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1245 --reg; 1543 --reg;
1246 } 1544 }
1247 1545
1248 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); 1546 rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes);
1249 if (rc != X86EMUL_CONTINUE) 1547 if (rc != X86EMUL_CONTINUE)
1250 break; 1548 break;
1251 --reg; 1549 --reg;
@@ -1265,37 +1563,32 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt,
1265 1563
1266 /* TODO: Add limit checks */ 1564 /* TODO: Add limit checks */
1267 c->src.val = ctxt->eflags; 1565 c->src.val = ctxt->eflags;
1268 emulate_push(ctxt, ops); 1566 rc = em_push(ctxt);
1269 rc = writeback(ctxt, ops);
1270 if (rc != X86EMUL_CONTINUE) 1567 if (rc != X86EMUL_CONTINUE)
1271 return rc; 1568 return rc;
1272 1569
1273 ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); 1570 ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
1274 1571
1275 c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 1572 c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1276 emulate_push(ctxt, ops); 1573 rc = em_push(ctxt);
1277 rc = writeback(ctxt, ops);
1278 if (rc != X86EMUL_CONTINUE) 1574 if (rc != X86EMUL_CONTINUE)
1279 return rc; 1575 return rc;
1280 1576
1281 c->src.val = c->eip; 1577 c->src.val = c->eip;
1282 emulate_push(ctxt, ops); 1578 rc = em_push(ctxt);
1283 rc = writeback(ctxt, ops);
1284 if (rc != X86EMUL_CONTINUE) 1579 if (rc != X86EMUL_CONTINUE)
1285 return rc; 1580 return rc;
1286 1581
1287 c->dst.type = OP_NONE; 1582 ops->get_idt(ctxt, &dt);
1288
1289 ops->get_idt(&dt, ctxt->vcpu);
1290 1583
1291 eip_addr = dt.address + (irq << 2); 1584 eip_addr = dt.address + (irq << 2);
1292 cs_addr = dt.address + (irq << 2) + 2; 1585 cs_addr = dt.address + (irq << 2) + 2;
1293 1586
1294 rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); 1587 rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
1295 if (rc != X86EMUL_CONTINUE) 1588 if (rc != X86EMUL_CONTINUE)
1296 return rc; 1589 return rc;
1297 1590
1298 rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); 1591 rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
1299 if (rc != X86EMUL_CONTINUE) 1592 if (rc != X86EMUL_CONTINUE)
1300 return rc; 1593 return rc;
1301 1594
@@ -1339,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
1339 1632
1340 /* TODO: Add stack limit check */ 1633 /* TODO: Add stack limit check */
1341 1634
1342 rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); 1635 rc = emulate_pop(ctxt, &temp_eip, c->op_bytes);
1343 1636
1344 if (rc != X86EMUL_CONTINUE) 1637 if (rc != X86EMUL_CONTINUE)
1345 return rc; 1638 return rc;
@@ -1347,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
1347 if (temp_eip & ~0xffff) 1640 if (temp_eip & ~0xffff)
1348 return emulate_gp(ctxt, 0); 1641 return emulate_gp(ctxt, 0);
1349 1642
1350 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1643 rc = emulate_pop(ctxt, &cs, c->op_bytes);
1351 1644
1352 if (rc != X86EMUL_CONTINUE) 1645 if (rc != X86EMUL_CONTINUE)
1353 return rc; 1646 return rc;
1354 1647
1355 rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); 1648 rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes);
1356 1649
1357 if (rc != X86EMUL_CONTINUE) 1650 if (rc != X86EMUL_CONTINUE)
1358 return rc; 1651 return rc;
@@ -1394,15 +1687,31 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt,
1394 } 1687 }
1395} 1688}
1396 1689
1397static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, 1690static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1398 struct x86_emulate_ops *ops) 1691{
1692 struct decode_cache *c = &ctxt->decode;
1693 int rc;
1694 unsigned short sel;
1695
1696 memcpy(&sel, c->src.valptr + c->op_bytes, 2);
1697
1698 rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS);
1699 if (rc != X86EMUL_CONTINUE)
1700 return rc;
1701
1702 c->eip = 0;
1703 memcpy(&c->eip, c->src.valptr, c->op_bytes);
1704 return X86EMUL_CONTINUE;
1705}
1706
1707static int em_grp1a(struct x86_emulate_ctxt *ctxt)
1399{ 1708{
1400 struct decode_cache *c = &ctxt->decode; 1709 struct decode_cache *c = &ctxt->decode;
1401 1710
1402 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); 1711 return emulate_pop(ctxt, &c->dst.val, c->dst.bytes);
1403} 1712}
1404 1713
1405static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) 1714static int em_grp2(struct x86_emulate_ctxt *ctxt)
1406{ 1715{
1407 struct decode_cache *c = &ctxt->decode; 1716 struct decode_cache *c = &ctxt->decode;
1408 switch (c->modrm_reg) { 1717 switch (c->modrm_reg) {
@@ -1429,10 +1738,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1429 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); 1738 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1430 break; 1739 break;
1431 } 1740 }
1741 return X86EMUL_CONTINUE;
1432} 1742}
1433 1743
1434static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, 1744static int em_grp3(struct x86_emulate_ctxt *ctxt)
1435 struct x86_emulate_ops *ops)
1436{ 1745{
1437 struct decode_cache *c = &ctxt->decode; 1746 struct decode_cache *c = &ctxt->decode;
1438 unsigned long *rax = &c->regs[VCPU_REGS_RAX]; 1747 unsigned long *rax = &c->regs[VCPU_REGS_RAX];
@@ -1471,10 +1780,10 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1471 return X86EMUL_CONTINUE; 1780 return X86EMUL_CONTINUE;
1472} 1781}
1473 1782
1474static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, 1783static int em_grp45(struct x86_emulate_ctxt *ctxt)
1475 struct x86_emulate_ops *ops)
1476{ 1784{
1477 struct decode_cache *c = &ctxt->decode; 1785 struct decode_cache *c = &ctxt->decode;
1786 int rc = X86EMUL_CONTINUE;
1478 1787
1479 switch (c->modrm_reg) { 1788 switch (c->modrm_reg) {
1480 case 0: /* inc */ 1789 case 0: /* inc */
@@ -1488,21 +1797,23 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1488 old_eip = c->eip; 1797 old_eip = c->eip;
1489 c->eip = c->src.val; 1798 c->eip = c->src.val;
1490 c->src.val = old_eip; 1799 c->src.val = old_eip;
1491 emulate_push(ctxt, ops); 1800 rc = em_push(ctxt);
1492 break; 1801 break;
1493 } 1802 }
1494 case 4: /* jmp abs */ 1803 case 4: /* jmp abs */
1495 c->eip = c->src.val; 1804 c->eip = c->src.val;
1496 break; 1805 break;
1806 case 5: /* jmp far */
1807 rc = em_jmp_far(ctxt);
1808 break;
1497 case 6: /* push */ 1809 case 6: /* push */
1498 emulate_push(ctxt, ops); 1810 rc = em_push(ctxt);
1499 break; 1811 break;
1500 } 1812 }
1501 return X86EMUL_CONTINUE; 1813 return rc;
1502} 1814}
1503 1815
1504static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, 1816static int em_grp9(struct x86_emulate_ctxt *ctxt)
1505 struct x86_emulate_ops *ops)
1506{ 1817{
1507 struct decode_cache *c = &ctxt->decode; 1818 struct decode_cache *c = &ctxt->decode;
1508 u64 old = c->dst.orig_val64; 1819 u64 old = c->dst.orig_val64;
@@ -1528,12 +1839,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1528 int rc; 1839 int rc;
1529 unsigned long cs; 1840 unsigned long cs;
1530 1841
1531 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); 1842 rc = emulate_pop(ctxt, &c->eip, c->op_bytes);
1532 if (rc != X86EMUL_CONTINUE) 1843 if (rc != X86EMUL_CONTINUE)
1533 return rc; 1844 return rc;
1534 if (c->op_bytes == 4) 1845 if (c->op_bytes == 4)
1535 c->eip = (u32)c->eip; 1846 c->eip = (u32)c->eip;
1536 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1847 rc = emulate_pop(ctxt, &cs, c->op_bytes);
1537 if (rc != X86EMUL_CONTINUE) 1848 if (rc != X86EMUL_CONTINUE)
1538 return rc; 1849 return rc;
1539 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); 1850 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
@@ -1562,8 +1873,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1562 struct x86_emulate_ops *ops, struct desc_struct *cs, 1873 struct x86_emulate_ops *ops, struct desc_struct *cs,
1563 struct desc_struct *ss) 1874 struct desc_struct *ss)
1564{ 1875{
1876 u16 selector;
1877
1565 memset(cs, 0, sizeof(struct desc_struct)); 1878 memset(cs, 0, sizeof(struct desc_struct));
1566 ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); 1879 ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS);
1567 memset(ss, 0, sizeof(struct desc_struct)); 1880 memset(ss, 0, sizeof(struct desc_struct));
1568 1881
1569 cs->l = 0; /* will be adjusted later */ 1882 cs->l = 0; /* will be adjusted later */
@@ -1593,44 +1906,44 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1593 struct desc_struct cs, ss; 1906 struct desc_struct cs, ss;
1594 u64 msr_data; 1907 u64 msr_data;
1595 u16 cs_sel, ss_sel; 1908 u16 cs_sel, ss_sel;
1909 u64 efer = 0;
1596 1910
1597 /* syscall is not available in real mode */ 1911 /* syscall is not available in real mode */
1598 if (ctxt->mode == X86EMUL_MODE_REAL || 1912 if (ctxt->mode == X86EMUL_MODE_REAL ||
1599 ctxt->mode == X86EMUL_MODE_VM86) 1913 ctxt->mode == X86EMUL_MODE_VM86)
1600 return emulate_ud(ctxt); 1914 return emulate_ud(ctxt);
1601 1915
1916 ops->get_msr(ctxt, MSR_EFER, &efer);
1602 setup_syscalls_segments(ctxt, ops, &cs, &ss); 1917 setup_syscalls_segments(ctxt, ops, &cs, &ss);
1603 1918
1604 ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); 1919 ops->get_msr(ctxt, MSR_STAR, &msr_data);
1605 msr_data >>= 32; 1920 msr_data >>= 32;
1606 cs_sel = (u16)(msr_data & 0xfffc); 1921 cs_sel = (u16)(msr_data & 0xfffc);
1607 ss_sel = (u16)(msr_data + 8); 1922 ss_sel = (u16)(msr_data + 8);
1608 1923
1609 if (is_long_mode(ctxt->vcpu)) { 1924 if (efer & EFER_LMA) {
1610 cs.d = 0; 1925 cs.d = 0;
1611 cs.l = 1; 1926 cs.l = 1;
1612 } 1927 }
1613 ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); 1928 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
1614 ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 1929 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
1615 ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
1616 ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1617 1930
1618 c->regs[VCPU_REGS_RCX] = c->eip; 1931 c->regs[VCPU_REGS_RCX] = c->eip;
1619 if (is_long_mode(ctxt->vcpu)) { 1932 if (efer & EFER_LMA) {
1620#ifdef CONFIG_X86_64 1933#ifdef CONFIG_X86_64
1621 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; 1934 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1622 1935
1623 ops->get_msr(ctxt->vcpu, 1936 ops->get_msr(ctxt,
1624 ctxt->mode == X86EMUL_MODE_PROT64 ? 1937 ctxt->mode == X86EMUL_MODE_PROT64 ?
1625 MSR_LSTAR : MSR_CSTAR, &msr_data); 1938 MSR_LSTAR : MSR_CSTAR, &msr_data);
1626 c->eip = msr_data; 1939 c->eip = msr_data;
1627 1940
1628 ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); 1941 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
1629 ctxt->eflags &= ~(msr_data | EFLG_RF); 1942 ctxt->eflags &= ~(msr_data | EFLG_RF);
1630#endif 1943#endif
1631 } else { 1944 } else {
1632 /* legacy mode */ 1945 /* legacy mode */
1633 ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); 1946 ops->get_msr(ctxt, MSR_STAR, &msr_data);
1634 c->eip = (u32)msr_data; 1947 c->eip = (u32)msr_data;
1635 1948
1636 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 1949 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
@@ -1646,7 +1959,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1646 struct desc_struct cs, ss; 1959 struct desc_struct cs, ss;
1647 u64 msr_data; 1960 u64 msr_data;
1648 u16 cs_sel, ss_sel; 1961 u16 cs_sel, ss_sel;
1962 u64 efer = 0;
1649 1963
1964 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1650 /* inject #GP if in real mode */ 1965 /* inject #GP if in real mode */
1651 if (ctxt->mode == X86EMUL_MODE_REAL) 1966 if (ctxt->mode == X86EMUL_MODE_REAL)
1652 return emulate_gp(ctxt, 0); 1967 return emulate_gp(ctxt, 0);
@@ -1659,7 +1974,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1659 1974
1660 setup_syscalls_segments(ctxt, ops, &cs, &ss); 1975 setup_syscalls_segments(ctxt, ops, &cs, &ss);
1661 1976
1662 ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); 1977 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
1663 switch (ctxt->mode) { 1978 switch (ctxt->mode) {
1664 case X86EMUL_MODE_PROT32: 1979 case X86EMUL_MODE_PROT32:
1665 if ((msr_data & 0xfffc) == 0x0) 1980 if ((msr_data & 0xfffc) == 0x0)
@@ -1676,21 +1991,18 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1676 cs_sel &= ~SELECTOR_RPL_MASK; 1991 cs_sel &= ~SELECTOR_RPL_MASK;
1677 ss_sel = cs_sel + 8; 1992 ss_sel = cs_sel + 8;
1678 ss_sel &= ~SELECTOR_RPL_MASK; 1993 ss_sel &= ~SELECTOR_RPL_MASK;
1679 if (ctxt->mode == X86EMUL_MODE_PROT64 1994 if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
1680 || is_long_mode(ctxt->vcpu)) {
1681 cs.d = 0; 1995 cs.d = 0;
1682 cs.l = 1; 1996 cs.l = 1;
1683 } 1997 }
1684 1998
1685 ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); 1999 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
1686 ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 2000 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
1687 ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
1688 ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1689 2001
1690 ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); 2002 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
1691 c->eip = msr_data; 2003 c->eip = msr_data;
1692 2004
1693 ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); 2005 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
1694 c->regs[VCPU_REGS_RSP] = msr_data; 2006 c->regs[VCPU_REGS_RSP] = msr_data;
1695 2007
1696 return X86EMUL_CONTINUE; 2008 return X86EMUL_CONTINUE;
@@ -1719,7 +2031,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1719 2031
1720 cs.dpl = 3; 2032 cs.dpl = 3;
1721 ss.dpl = 3; 2033 ss.dpl = 3;
1722 ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); 2034 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
1723 switch (usermode) { 2035 switch (usermode) {
1724 case X86EMUL_MODE_PROT32: 2036 case X86EMUL_MODE_PROT32:
1725 cs_sel = (u16)(msr_data + 16); 2037 cs_sel = (u16)(msr_data + 16);
@@ -1739,10 +2051,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1739 cs_sel |= SELECTOR_RPL_MASK; 2051 cs_sel |= SELECTOR_RPL_MASK;
1740 ss_sel |= SELECTOR_RPL_MASK; 2052 ss_sel |= SELECTOR_RPL_MASK;
1741 2053
1742 ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); 2054 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
1743 ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 2055 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
1744 ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
1745 ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1746 2056
1747 c->eip = c->regs[VCPU_REGS_RDX]; 2057 c->eip = c->regs[VCPU_REGS_RDX];
1748 c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; 2058 c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];
@@ -1759,7 +2069,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
1759 if (ctxt->mode == X86EMUL_MODE_VM86) 2069 if (ctxt->mode == X86EMUL_MODE_VM86)
1760 return true; 2070 return true;
1761 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 2071 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1762 return ops->cpl(ctxt->vcpu) > iopl; 2072 return ops->cpl(ctxt) > iopl;
1763} 2073}
1764 2074
1765static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, 2075static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
@@ -1769,11 +2079,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1769 struct desc_struct tr_seg; 2079 struct desc_struct tr_seg;
1770 u32 base3; 2080 u32 base3;
1771 int r; 2081 int r;
1772 u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; 2082 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
1773 unsigned mask = (1 << len) - 1; 2083 unsigned mask = (1 << len) - 1;
1774 unsigned long base; 2084 unsigned long base;
1775 2085
1776 ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); 2086 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
1777 if (!tr_seg.p) 2087 if (!tr_seg.p)
1778 return false; 2088 return false;
1779 if (desc_limit_scaled(&tr_seg) < 103) 2089 if (desc_limit_scaled(&tr_seg) < 103)
@@ -1782,13 +2092,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1782#ifdef CONFIG_X86_64 2092#ifdef CONFIG_X86_64
1783 base |= ((u64)base3) << 32; 2093 base |= ((u64)base3) << 32;
1784#endif 2094#endif
1785 r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); 2095 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
1786 if (r != X86EMUL_CONTINUE) 2096 if (r != X86EMUL_CONTINUE)
1787 return false; 2097 return false;
1788 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) 2098 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
1789 return false; 2099 return false;
1790 r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, 2100 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
1791 NULL);
1792 if (r != X86EMUL_CONTINUE) 2101 if (r != X86EMUL_CONTINUE)
1793 return false; 2102 return false;
1794 if ((perm >> bit_idx) & mask) 2103 if ((perm >> bit_idx) & mask)
@@ -1829,11 +2138,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
1829 tss->si = c->regs[VCPU_REGS_RSI]; 2138 tss->si = c->regs[VCPU_REGS_RSI];
1830 tss->di = c->regs[VCPU_REGS_RDI]; 2139 tss->di = c->regs[VCPU_REGS_RDI];
1831 2140
1832 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); 2141 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
1833 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 2142 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
1834 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); 2143 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
1835 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); 2144 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
1836 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); 2145 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
1837} 2146}
1838 2147
1839static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, 2148static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
@@ -1858,11 +2167,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
1858 * SDM says that segment selectors are loaded before segment 2167 * SDM says that segment selectors are loaded before segment
1859 * descriptors 2168 * descriptors
1860 */ 2169 */
1861 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); 2170 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
1862 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); 2171 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
1863 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); 2172 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
1864 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); 2173 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
1865 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); 2174 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
1866 2175
1867 /* 2176 /*
1868 * Now load segment descriptors. If fault happenes at this stage 2177 * Now load segment descriptors. If fault happenes at this stage
@@ -1896,7 +2205,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
1896 int ret; 2205 int ret;
1897 u32 new_tss_base = get_desc_base(new_desc); 2206 u32 new_tss_base = get_desc_base(new_desc);
1898 2207
1899 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2208 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
1900 &ctxt->exception); 2209 &ctxt->exception);
1901 if (ret != X86EMUL_CONTINUE) 2210 if (ret != X86EMUL_CONTINUE)
1902 /* FIXME: need to provide precise fault address */ 2211 /* FIXME: need to provide precise fault address */
@@ -1904,13 +2213,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
1904 2213
1905 save_state_to_tss16(ctxt, ops, &tss_seg); 2214 save_state_to_tss16(ctxt, ops, &tss_seg);
1906 2215
1907 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2216 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
1908 &ctxt->exception); 2217 &ctxt->exception);
1909 if (ret != X86EMUL_CONTINUE) 2218 if (ret != X86EMUL_CONTINUE)
1910 /* FIXME: need to provide precise fault address */ 2219 /* FIXME: need to provide precise fault address */
1911 return ret; 2220 return ret;
1912 2221
1913 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2222 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
1914 &ctxt->exception); 2223 &ctxt->exception);
1915 if (ret != X86EMUL_CONTINUE) 2224 if (ret != X86EMUL_CONTINUE)
1916 /* FIXME: need to provide precise fault address */ 2225 /* FIXME: need to provide precise fault address */
@@ -1919,10 +2228,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
1919 if (old_tss_sel != 0xffff) { 2228 if (old_tss_sel != 0xffff) {
1920 tss_seg.prev_task_link = old_tss_sel; 2229 tss_seg.prev_task_link = old_tss_sel;
1921 2230
1922 ret = ops->write_std(new_tss_base, 2231 ret = ops->write_std(ctxt, new_tss_base,
1923 &tss_seg.prev_task_link, 2232 &tss_seg.prev_task_link,
1924 sizeof tss_seg.prev_task_link, 2233 sizeof tss_seg.prev_task_link,
1925 ctxt->vcpu, &ctxt->exception); 2234 &ctxt->exception);
1926 if (ret != X86EMUL_CONTINUE) 2235 if (ret != X86EMUL_CONTINUE)
1927 /* FIXME: need to provide precise fault address */ 2236 /* FIXME: need to provide precise fault address */
1928 return ret; 2237 return ret;
@@ -1937,7 +2246,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
1937{ 2246{
1938 struct decode_cache *c = &ctxt->decode; 2247 struct decode_cache *c = &ctxt->decode;
1939 2248
1940 tss->cr3 = ops->get_cr(3, ctxt->vcpu); 2249 tss->cr3 = ops->get_cr(ctxt, 3);
1941 tss->eip = c->eip; 2250 tss->eip = c->eip;
1942 tss->eflags = ctxt->eflags; 2251 tss->eflags = ctxt->eflags;
1943 tss->eax = c->regs[VCPU_REGS_RAX]; 2252 tss->eax = c->regs[VCPU_REGS_RAX];
@@ -1949,13 +2258,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
1949 tss->esi = c->regs[VCPU_REGS_RSI]; 2258 tss->esi = c->regs[VCPU_REGS_RSI];
1950 tss->edi = c->regs[VCPU_REGS_RDI]; 2259 tss->edi = c->regs[VCPU_REGS_RDI];
1951 2260
1952 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); 2261 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
1953 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 2262 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
1954 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); 2263 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
1955 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); 2264 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
1956 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); 2265 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
1957 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); 2266 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
1958 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); 2267 tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR);
1959} 2268}
1960 2269
1961static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, 2270static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
@@ -1965,7 +2274,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
1965 struct decode_cache *c = &ctxt->decode; 2274 struct decode_cache *c = &ctxt->decode;
1966 int ret; 2275 int ret;
1967 2276
1968 if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) 2277 if (ops->set_cr(ctxt, 3, tss->cr3))
1969 return emulate_gp(ctxt, 0); 2278 return emulate_gp(ctxt, 0);
1970 c->eip = tss->eip; 2279 c->eip = tss->eip;
1971 ctxt->eflags = tss->eflags | 2; 2280 ctxt->eflags = tss->eflags | 2;
@@ -1982,13 +2291,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
1982 * SDM says that segment selectors are loaded before segment 2291 * SDM says that segment selectors are loaded before segment
1983 * descriptors 2292 * descriptors
1984 */ 2293 */
1985 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); 2294 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
1986 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); 2295 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
1987 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); 2296 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
1988 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); 2297 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
1989 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); 2298 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
1990 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); 2299 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
1991 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); 2300 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
1992 2301
1993 /* 2302 /*
1994 * Now load segment descriptors. If fault happenes at this stage 2303 * Now load segment descriptors. If fault happenes at this stage
@@ -2028,7 +2337,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2028 int ret; 2337 int ret;
2029 u32 new_tss_base = get_desc_base(new_desc); 2338 u32 new_tss_base = get_desc_base(new_desc);
2030 2339
2031 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2340 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2032 &ctxt->exception); 2341 &ctxt->exception);
2033 if (ret != X86EMUL_CONTINUE) 2342 if (ret != X86EMUL_CONTINUE)
2034 /* FIXME: need to provide precise fault address */ 2343 /* FIXME: need to provide precise fault address */
@@ -2036,13 +2345,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2036 2345
2037 save_state_to_tss32(ctxt, ops, &tss_seg); 2346 save_state_to_tss32(ctxt, ops, &tss_seg);
2038 2347
2039 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2348 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2040 &ctxt->exception); 2349 &ctxt->exception);
2041 if (ret != X86EMUL_CONTINUE) 2350 if (ret != X86EMUL_CONTINUE)
2042 /* FIXME: need to provide precise fault address */ 2351 /* FIXME: need to provide precise fault address */
2043 return ret; 2352 return ret;
2044 2353
2045 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2354 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2046 &ctxt->exception); 2355 &ctxt->exception);
2047 if (ret != X86EMUL_CONTINUE) 2356 if (ret != X86EMUL_CONTINUE)
2048 /* FIXME: need to provide precise fault address */ 2357 /* FIXME: need to provide precise fault address */
@@ -2051,10 +2360,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2051 if (old_tss_sel != 0xffff) { 2360 if (old_tss_sel != 0xffff) {
2052 tss_seg.prev_task_link = old_tss_sel; 2361 tss_seg.prev_task_link = old_tss_sel;
2053 2362
2054 ret = ops->write_std(new_tss_base, 2363 ret = ops->write_std(ctxt, new_tss_base,
2055 &tss_seg.prev_task_link, 2364 &tss_seg.prev_task_link,
2056 sizeof tss_seg.prev_task_link, 2365 sizeof tss_seg.prev_task_link,
2057 ctxt->vcpu, &ctxt->exception); 2366 &ctxt->exception);
2058 if (ret != X86EMUL_CONTINUE) 2367 if (ret != X86EMUL_CONTINUE)
2059 /* FIXME: need to provide precise fault address */ 2368 /* FIXME: need to provide precise fault address */
2060 return ret; 2369 return ret;
@@ -2070,9 +2379,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2070{ 2379{
2071 struct desc_struct curr_tss_desc, next_tss_desc; 2380 struct desc_struct curr_tss_desc, next_tss_desc;
2072 int ret; 2381 int ret;
2073 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); 2382 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2074 ulong old_tss_base = 2383 ulong old_tss_base =
2075 ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); 2384 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2076 u32 desc_limit; 2385 u32 desc_limit;
2077 2386
2078 /* FIXME: old_tss_base == ~0 ? */ 2387 /* FIXME: old_tss_base == ~0 ? */
@@ -2088,7 +2397,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2088 2397
2089 if (reason != TASK_SWITCH_IRET) { 2398 if (reason != TASK_SWITCH_IRET) {
2090 if ((tss_selector & 3) > next_tss_desc.dpl || 2399 if ((tss_selector & 3) > next_tss_desc.dpl ||
2091 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) 2400 ops->cpl(ctxt) > next_tss_desc.dpl)
2092 return emulate_gp(ctxt, 0); 2401 return emulate_gp(ctxt, 0);
2093 } 2402 }
2094 2403
@@ -2132,9 +2441,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2132 &next_tss_desc); 2441 &next_tss_desc);
2133 } 2442 }
2134 2443
2135 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); 2444 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
2136 ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); 2445 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
2137 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2138 2446
2139 if (has_error_code) { 2447 if (has_error_code) {
2140 struct decode_cache *c = &ctxt->decode; 2448 struct decode_cache *c = &ctxt->decode;
@@ -2142,7 +2450,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2142 c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; 2450 c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2143 c->lock_prefix = 0; 2451 c->lock_prefix = 0;
2144 c->src.val = (unsigned long) error_code; 2452 c->src.val = (unsigned long) error_code;
2145 emulate_push(ctxt, ops); 2453 ret = em_push(ctxt);
2146 } 2454 }
2147 2455
2148 return ret; 2456 return ret;
@@ -2162,13 +2470,10 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2162 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, 2470 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
2163 has_error_code, error_code); 2471 has_error_code, error_code);
2164 2472
2165 if (rc == X86EMUL_CONTINUE) { 2473 if (rc == X86EMUL_CONTINUE)
2166 rc = writeback(ctxt, ops); 2474 ctxt->eip = c->eip;
2167 if (rc == X86EMUL_CONTINUE)
2168 ctxt->eip = c->eip;
2169 }
2170 2475
2171 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 2476 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
2172} 2477}
2173 2478
2174static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, 2479static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
@@ -2182,12 +2487,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
2182 op->addr.mem.seg = seg; 2487 op->addr.mem.seg = seg;
2183} 2488}
2184 2489
2185static int em_push(struct x86_emulate_ctxt *ctxt)
2186{
2187 emulate_push(ctxt, ctxt->ops);
2188 return X86EMUL_CONTINUE;
2189}
2190
2191static int em_das(struct x86_emulate_ctxt *ctxt) 2490static int em_das(struct x86_emulate_ctxt *ctxt)
2192{ 2491{
2193 struct decode_cache *c = &ctxt->decode; 2492 struct decode_cache *c = &ctxt->decode;
@@ -2234,7 +2533,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
2234 ulong old_eip; 2533 ulong old_eip;
2235 int rc; 2534 int rc;
2236 2535
2237 old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 2536 old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2238 old_eip = c->eip; 2537 old_eip = c->eip;
2239 2538
2240 memcpy(&sel, c->src.valptr + c->op_bytes, 2); 2539 memcpy(&sel, c->src.valptr + c->op_bytes, 2);
@@ -2245,20 +2544,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
2245 memcpy(&c->eip, c->src.valptr, c->op_bytes); 2544 memcpy(&c->eip, c->src.valptr, c->op_bytes);
2246 2545
2247 c->src.val = old_cs; 2546 c->src.val = old_cs;
2248 emulate_push(ctxt, ctxt->ops); 2547 rc = em_push(ctxt);
2249 rc = writeback(ctxt, ctxt->ops);
2250 if (rc != X86EMUL_CONTINUE) 2548 if (rc != X86EMUL_CONTINUE)
2251 return rc; 2549 return rc;
2252 2550
2253 c->src.val = old_eip; 2551 c->src.val = old_eip;
2254 emulate_push(ctxt, ctxt->ops); 2552 return em_push(ctxt);
2255 rc = writeback(ctxt, ctxt->ops);
2256 if (rc != X86EMUL_CONTINUE)
2257 return rc;
2258
2259 c->dst.type = OP_NONE;
2260
2261 return X86EMUL_CONTINUE;
2262} 2553}
2263 2554
2264static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) 2555static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
@@ -2269,13 +2560,79 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2269 c->dst.type = OP_REG; 2560 c->dst.type = OP_REG;
2270 c->dst.addr.reg = &c->eip; 2561 c->dst.addr.reg = &c->eip;
2271 c->dst.bytes = c->op_bytes; 2562 c->dst.bytes = c->op_bytes;
2272 rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); 2563 rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes);
2273 if (rc != X86EMUL_CONTINUE) 2564 if (rc != X86EMUL_CONTINUE)
2274 return rc; 2565 return rc;
2275 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); 2566 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val);
2276 return X86EMUL_CONTINUE; 2567 return X86EMUL_CONTINUE;
2277} 2568}
2278 2569
2570static int em_add(struct x86_emulate_ctxt *ctxt)
2571{
2572 struct decode_cache *c = &ctxt->decode;
2573
2574 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2575 return X86EMUL_CONTINUE;
2576}
2577
2578static int em_or(struct x86_emulate_ctxt *ctxt)
2579{
2580 struct decode_cache *c = &ctxt->decode;
2581
2582 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2583 return X86EMUL_CONTINUE;
2584}
2585
2586static int em_adc(struct x86_emulate_ctxt *ctxt)
2587{
2588 struct decode_cache *c = &ctxt->decode;
2589
2590 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2591 return X86EMUL_CONTINUE;
2592}
2593
2594static int em_sbb(struct x86_emulate_ctxt *ctxt)
2595{
2596 struct decode_cache *c = &ctxt->decode;
2597
2598 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2599 return X86EMUL_CONTINUE;
2600}
2601
2602static int em_and(struct x86_emulate_ctxt *ctxt)
2603{
2604 struct decode_cache *c = &ctxt->decode;
2605
2606 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2607 return X86EMUL_CONTINUE;
2608}
2609
2610static int em_sub(struct x86_emulate_ctxt *ctxt)
2611{
2612 struct decode_cache *c = &ctxt->decode;
2613
2614 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2615 return X86EMUL_CONTINUE;
2616}
2617
2618static int em_xor(struct x86_emulate_ctxt *ctxt)
2619{
2620 struct decode_cache *c = &ctxt->decode;
2621
2622 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2623 return X86EMUL_CONTINUE;
2624}
2625
2626static int em_cmp(struct x86_emulate_ctxt *ctxt)
2627{
2628 struct decode_cache *c = &ctxt->decode;
2629
2630 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2631 /* Disable writeback. */
2632 c->dst.type = OP_NONE;
2633 return X86EMUL_CONTINUE;
2634}
2635
2279static int em_imul(struct x86_emulate_ctxt *ctxt) 2636static int em_imul(struct x86_emulate_ctxt *ctxt)
2280{ 2637{
2281 struct decode_cache *c = &ctxt->decode; 2638 struct decode_cache *c = &ctxt->decode;
@@ -2306,13 +2663,10 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
2306 2663
2307static int em_rdtsc(struct x86_emulate_ctxt *ctxt) 2664static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
2308{ 2665{
2309 unsigned cpl = ctxt->ops->cpl(ctxt->vcpu);
2310 struct decode_cache *c = &ctxt->decode; 2666 struct decode_cache *c = &ctxt->decode;
2311 u64 tsc = 0; 2667 u64 tsc = 0;
2312 2668
2313 if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) 2669 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
2314 return emulate_gp(ctxt, 0);
2315 ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc);
2316 c->regs[VCPU_REGS_RAX] = (u32)tsc; 2670 c->regs[VCPU_REGS_RAX] = (u32)tsc;
2317 c->regs[VCPU_REGS_RDX] = tsc >> 32; 2671 c->regs[VCPU_REGS_RDX] = tsc >> 32;
2318 return X86EMUL_CONTINUE; 2672 return X86EMUL_CONTINUE;
@@ -2325,22 +2679,375 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
2325 return X86EMUL_CONTINUE; 2679 return X86EMUL_CONTINUE;
2326} 2680}
2327 2681
2682static int em_movdqu(struct x86_emulate_ctxt *ctxt)
2683{
2684 struct decode_cache *c = &ctxt->decode;
2685 memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes);
2686 return X86EMUL_CONTINUE;
2687}
2688
2689static int em_invlpg(struct x86_emulate_ctxt *ctxt)
2690{
2691 struct decode_cache *c = &ctxt->decode;
2692 int rc;
2693 ulong linear;
2694
2695 rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear);
2696 if (rc == X86EMUL_CONTINUE)
2697 ctxt->ops->invlpg(ctxt, linear);
2698 /* Disable writeback. */
2699 c->dst.type = OP_NONE;
2700 return X86EMUL_CONTINUE;
2701}
2702
2703static int em_clts(struct x86_emulate_ctxt *ctxt)
2704{
2705 ulong cr0;
2706
2707 cr0 = ctxt->ops->get_cr(ctxt, 0);
2708 cr0 &= ~X86_CR0_TS;
2709 ctxt->ops->set_cr(ctxt, 0, cr0);
2710 return X86EMUL_CONTINUE;
2711}
2712
2713static int em_vmcall(struct x86_emulate_ctxt *ctxt)
2714{
2715 struct decode_cache *c = &ctxt->decode;
2716 int rc;
2717
2718 if (c->modrm_mod != 3 || c->modrm_rm != 1)
2719 return X86EMUL_UNHANDLEABLE;
2720
2721 rc = ctxt->ops->fix_hypercall(ctxt);
2722 if (rc != X86EMUL_CONTINUE)
2723 return rc;
2724
2725 /* Let the processor re-execute the fixed hypercall */
2726 c->eip = ctxt->eip;
2727 /* Disable writeback. */
2728 c->dst.type = OP_NONE;
2729 return X86EMUL_CONTINUE;
2730}
2731
2732static int em_lgdt(struct x86_emulate_ctxt *ctxt)
2733{
2734 struct decode_cache *c = &ctxt->decode;
2735 struct desc_ptr desc_ptr;
2736 int rc;
2737
2738 rc = read_descriptor(ctxt, c->src.addr.mem,
2739 &desc_ptr.size, &desc_ptr.address,
2740 c->op_bytes);
2741 if (rc != X86EMUL_CONTINUE)
2742 return rc;
2743 ctxt->ops->set_gdt(ctxt, &desc_ptr);
2744 /* Disable writeback. */
2745 c->dst.type = OP_NONE;
2746 return X86EMUL_CONTINUE;
2747}
2748
2749static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
2750{
2751 struct decode_cache *c = &ctxt->decode;
2752 int rc;
2753
2754 rc = ctxt->ops->fix_hypercall(ctxt);
2755
2756 /* Disable writeback. */
2757 c->dst.type = OP_NONE;
2758 return rc;
2759}
2760
2761static int em_lidt(struct x86_emulate_ctxt *ctxt)
2762{
2763 struct decode_cache *c = &ctxt->decode;
2764 struct desc_ptr desc_ptr;
2765 int rc;
2766
2767 rc = read_descriptor(ctxt, c->src.addr.mem,
2768 &desc_ptr.size, &desc_ptr.address,
2769 c->op_bytes);
2770 if (rc != X86EMUL_CONTINUE)
2771 return rc;
2772 ctxt->ops->set_idt(ctxt, &desc_ptr);
2773 /* Disable writeback. */
2774 c->dst.type = OP_NONE;
2775 return X86EMUL_CONTINUE;
2776}
2777
2778static int em_smsw(struct x86_emulate_ctxt *ctxt)
2779{
2780 struct decode_cache *c = &ctxt->decode;
2781
2782 c->dst.bytes = 2;
2783 c->dst.val = ctxt->ops->get_cr(ctxt, 0);
2784 return X86EMUL_CONTINUE;
2785}
2786
2787static int em_lmsw(struct x86_emulate_ctxt *ctxt)
2788{
2789 struct decode_cache *c = &ctxt->decode;
2790 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
2791 | (c->src.val & 0x0f));
2792 c->dst.type = OP_NONE;
2793 return X86EMUL_CONTINUE;
2794}
2795
2796static bool valid_cr(int nr)
2797{
2798 switch (nr) {
2799 case 0:
2800 case 2 ... 4:
2801 case 8:
2802 return true;
2803 default:
2804 return false;
2805 }
2806}
2807
2808static int check_cr_read(struct x86_emulate_ctxt *ctxt)
2809{
2810 struct decode_cache *c = &ctxt->decode;
2811
2812 if (!valid_cr(c->modrm_reg))
2813 return emulate_ud(ctxt);
2814
2815 return X86EMUL_CONTINUE;
2816}
2817
2818static int check_cr_write(struct x86_emulate_ctxt *ctxt)
2819{
2820 struct decode_cache *c = &ctxt->decode;
2821 u64 new_val = c->src.val64;
2822 int cr = c->modrm_reg;
2823 u64 efer = 0;
2824
2825 static u64 cr_reserved_bits[] = {
2826 0xffffffff00000000ULL,
2827 0, 0, 0, /* CR3 checked later */
2828 CR4_RESERVED_BITS,
2829 0, 0, 0,
2830 CR8_RESERVED_BITS,
2831 };
2832
2833 if (!valid_cr(cr))
2834 return emulate_ud(ctxt);
2835
2836 if (new_val & cr_reserved_bits[cr])
2837 return emulate_gp(ctxt, 0);
2838
2839 switch (cr) {
2840 case 0: {
2841 u64 cr4;
2842 if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
2843 ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
2844 return emulate_gp(ctxt, 0);
2845
2846 cr4 = ctxt->ops->get_cr(ctxt, 4);
2847 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
2848
2849 if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
2850 !(cr4 & X86_CR4_PAE))
2851 return emulate_gp(ctxt, 0);
2852
2853 break;
2854 }
2855 case 3: {
2856 u64 rsvd = 0;
2857
2858 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
2859 if (efer & EFER_LMA)
2860 rsvd = CR3_L_MODE_RESERVED_BITS;
2861 else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE)
2862 rsvd = CR3_PAE_RESERVED_BITS;
2863 else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG)
2864 rsvd = CR3_NONPAE_RESERVED_BITS;
2865
2866 if (new_val & rsvd)
2867 return emulate_gp(ctxt, 0);
2868
2869 break;
2870 }
2871 case 4: {
2872 u64 cr4;
2873
2874 cr4 = ctxt->ops->get_cr(ctxt, 4);
2875 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
2876
2877 if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
2878 return emulate_gp(ctxt, 0);
2879
2880 break;
2881 }
2882 }
2883
2884 return X86EMUL_CONTINUE;
2885}
2886
2887static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
2888{
2889 unsigned long dr7;
2890
2891 ctxt->ops->get_dr(ctxt, 7, &dr7);
2892
2893 /* Check if DR7.Global_Enable is set */
2894 return dr7 & (1 << 13);
2895}
2896
2897static int check_dr_read(struct x86_emulate_ctxt *ctxt)
2898{
2899 struct decode_cache *c = &ctxt->decode;
2900 int dr = c->modrm_reg;
2901 u64 cr4;
2902
2903 if (dr > 7)
2904 return emulate_ud(ctxt);
2905
2906 cr4 = ctxt->ops->get_cr(ctxt, 4);
2907 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
2908 return emulate_ud(ctxt);
2909
2910 if (check_dr7_gd(ctxt))
2911 return emulate_db(ctxt);
2912
2913 return X86EMUL_CONTINUE;
2914}
2915
2916static int check_dr_write(struct x86_emulate_ctxt *ctxt)
2917{
2918 struct decode_cache *c = &ctxt->decode;
2919 u64 new_val = c->src.val64;
2920 int dr = c->modrm_reg;
2921
2922 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
2923 return emulate_gp(ctxt, 0);
2924
2925 return check_dr_read(ctxt);
2926}
2927
2928static int check_svme(struct x86_emulate_ctxt *ctxt)
2929{
2930 u64 efer;
2931
2932 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
2933
2934 if (!(efer & EFER_SVME))
2935 return emulate_ud(ctxt);
2936
2937 return X86EMUL_CONTINUE;
2938}
2939
2940static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
2941{
2942 u64 rax = ctxt->decode.regs[VCPU_REGS_RAX];
2943
2944 /* Valid physical address? */
2945 if (rax & 0xffff000000000000ULL)
2946 return emulate_gp(ctxt, 0);
2947
2948 return check_svme(ctxt);
2949}
2950
2951static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
2952{
2953 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
2954
2955 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
2956 return emulate_ud(ctxt);
2957
2958 return X86EMUL_CONTINUE;
2959}
2960
2961static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
2962{
2963 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
2964 u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX];
2965
2966 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
2967 (rcx > 3))
2968 return emulate_gp(ctxt, 0);
2969
2970 return X86EMUL_CONTINUE;
2971}
2972
2973static int check_perm_in(struct x86_emulate_ctxt *ctxt)
2974{
2975 struct decode_cache *c = &ctxt->decode;
2976
2977 c->dst.bytes = min(c->dst.bytes, 4u);
2978 if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes))
2979 return emulate_gp(ctxt, 0);
2980
2981 return X86EMUL_CONTINUE;
2982}
2983
2984static int check_perm_out(struct x86_emulate_ctxt *ctxt)
2985{
2986 struct decode_cache *c = &ctxt->decode;
2987
2988 c->src.bytes = min(c->src.bytes, 4u);
2989 if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes))
2990 return emulate_gp(ctxt, 0);
2991
2992 return X86EMUL_CONTINUE;
2993}
2994
2328#define D(_y) { .flags = (_y) } 2995#define D(_y) { .flags = (_y) }
2996#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
2997#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
2998 .check_perm = (_p) }
2329#define N D(0) 2999#define N D(0)
3000#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
2330#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } 3001#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
2331#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } 3002#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) }
2332#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3003#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3004#define II(_f, _e, _i) \
3005 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
3006#define IIP(_f, _e, _i, _p) \
3007 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \
3008 .check_perm = (_p) }
3009#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
2333 3010
2334#define D2bv(_f) D((_f) | ByteOp), D(_f) 3011#define D2bv(_f) D((_f) | ByteOp), D(_f)
3012#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
2335#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) 3013#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
2336 3014
2337#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ 3015#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \
2338 D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ 3016 I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
2339 D2bv(((_f) & ~Lock) | DstAcc | SrcImm) 3017 I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
2340 3018
3019static struct opcode group7_rm1[] = {
3020 DI(SrcNone | ModRM | Priv, monitor),
3021 DI(SrcNone | ModRM | Priv, mwait),
3022 N, N, N, N, N, N,
3023};
3024
3025static struct opcode group7_rm3[] = {
3026 DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa),
3027 II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall),
3028 DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa),
3029 DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa),
3030 DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme),
3031 DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme),
3032 DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme),
3033 DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme),
3034};
3035
3036static struct opcode group7_rm7[] = {
3037 N,
3038 DIP(SrcNone | ModRM, rdtscp, check_rdtsc),
3039 N, N, N, N, N, N,
3040};
2341 3041
2342static struct opcode group1[] = { 3042static struct opcode group1[] = {
2343 X7(D(Lock)), N 3043 I(Lock, em_add),
3044 I(Lock, em_or),
3045 I(Lock, em_adc),
3046 I(Lock, em_sbb),
3047 I(Lock, em_and),
3048 I(Lock, em_sub),
3049 I(Lock, em_xor),
3050 I(0, em_cmp),
2344}; 3051};
2345 3052
2346static struct opcode group1A[] = { 3053static struct opcode group1A[] = {
@@ -2366,16 +3073,28 @@ static struct opcode group5[] = {
2366 D(SrcMem | ModRM | Stack), N, 3073 D(SrcMem | ModRM | Stack), N,
2367}; 3074};
2368 3075
3076static struct opcode group6[] = {
3077 DI(ModRM | Prot, sldt),
3078 DI(ModRM | Prot, str),
3079 DI(ModRM | Prot | Priv, lldt),
3080 DI(ModRM | Prot | Priv, ltr),
3081 N, N, N, N,
3082};
3083
2369static struct group_dual group7 = { { 3084static struct group_dual group7 = { {
2370 N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), 3085 DI(ModRM | Mov | DstMem | Priv, sgdt),
2371 D(SrcNone | ModRM | DstMem | Mov), N, 3086 DI(ModRM | Mov | DstMem | Priv, sidt),
2372 D(SrcMem16 | ModRM | Mov | Priv), 3087 II(ModRM | SrcMem | Priv, em_lgdt, lgdt),
2373 D(SrcMem | ModRM | ByteOp | Priv | NoAccess), 3088 II(ModRM | SrcMem | Priv, em_lidt, lidt),
3089 II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
3090 II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw),
3091 II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
2374}, { 3092}, {
2375 D(SrcNone | ModRM | Priv | VendorSpecific), N, 3093 I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall),
2376 N, D(SrcNone | ModRM | Priv | VendorSpecific), 3094 EXT(0, group7_rm1),
2377 D(SrcNone | ModRM | DstMem | Mov), N, 3095 N, EXT(0, group7_rm3),
2378 D(SrcMem16 | ModRM | Mov | Priv), N, 3096 II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
3097 II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7),
2379} }; 3098} };
2380 3099
2381static struct opcode group8[] = { 3100static struct opcode group8[] = {
@@ -2394,35 +3113,40 @@ static struct opcode group11[] = {
2394 I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), 3113 I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
2395}; 3114};
2396 3115
3116static struct gprefix pfx_0f_6f_0f_7f = {
3117 N, N, N, I(Sse, em_movdqu),
3118};
3119
2397static struct opcode opcode_table[256] = { 3120static struct opcode opcode_table[256] = {
2398 /* 0x00 - 0x07 */ 3121 /* 0x00 - 0x07 */
2399 D6ALU(Lock), 3122 I6ALU(Lock, em_add),
2400 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), 3123 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2401 /* 0x08 - 0x0F */ 3124 /* 0x08 - 0x0F */
2402 D6ALU(Lock), 3125 I6ALU(Lock, em_or),
2403 D(ImplicitOps | Stack | No64), N, 3126 D(ImplicitOps | Stack | No64), N,
2404 /* 0x10 - 0x17 */ 3127 /* 0x10 - 0x17 */
2405 D6ALU(Lock), 3128 I6ALU(Lock, em_adc),
2406 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), 3129 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2407 /* 0x18 - 0x1F */ 3130 /* 0x18 - 0x1F */
2408 D6ALU(Lock), 3131 I6ALU(Lock, em_sbb),
2409 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), 3132 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2410 /* 0x20 - 0x27 */ 3133 /* 0x20 - 0x27 */
2411 D6ALU(Lock), N, N, 3134 I6ALU(Lock, em_and), N, N,
2412 /* 0x28 - 0x2F */ 3135 /* 0x28 - 0x2F */
2413 D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), 3136 I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
2414 /* 0x30 - 0x37 */ 3137 /* 0x30 - 0x37 */
2415 D6ALU(Lock), N, N, 3138 I6ALU(Lock, em_xor), N, N,
2416 /* 0x38 - 0x3F */ 3139 /* 0x38 - 0x3F */
2417 D6ALU(0), N, N, 3140 I6ALU(0, em_cmp), N, N,
2418 /* 0x40 - 0x4F */ 3141 /* 0x40 - 0x4F */
2419 X16(D(DstReg)), 3142 X16(D(DstReg)),
2420 /* 0x50 - 0x57 */ 3143 /* 0x50 - 0x57 */
2421 X8(I(SrcReg | Stack, em_push)), 3144 X8(I(SrcReg | Stack, em_push)),
2422 /* 0x58 - 0x5F */ 3145 /* 0x58 - 0x5F */
2423 X8(D(DstReg | Stack)), 3146 X8(I(DstReg | Stack, em_pop)),
2424 /* 0x60 - 0x67 */ 3147 /* 0x60 - 0x67 */
2425 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), 3148 I(ImplicitOps | Stack | No64, em_pusha),
3149 I(ImplicitOps | Stack | No64, em_popa),
2426 N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , 3150 N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
2427 N, N, N, N, 3151 N, N, N, N,
2428 /* 0x68 - 0x6F */ 3152 /* 0x68 - 0x6F */
@@ -2430,8 +3154,8 @@ static struct opcode opcode_table[256] = {
2430 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), 3154 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
2431 I(SrcImmByte | Mov | Stack, em_push), 3155 I(SrcImmByte | Mov | Stack, em_push),
2432 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), 3156 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
2433 D2bv(DstDI | Mov | String), /* insb, insw/insd */ 3157 D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */
2434 D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ 3158 D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */
2435 /* 0x70 - 0x7F */ 3159 /* 0x70 - 0x7F */
2436 X16(D(SrcImmByte)), 3160 X16(D(SrcImmByte)),
2437 /* 0x80 - 0x87 */ 3161 /* 0x80 - 0x87 */
@@ -2446,21 +3170,22 @@ static struct opcode opcode_table[256] = {
2446 D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), 3170 D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg),
2447 D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), 3171 D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A),
2448 /* 0x90 - 0x97 */ 3172 /* 0x90 - 0x97 */
2449 X8(D(SrcAcc | DstReg)), 3173 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
2450 /* 0x98 - 0x9F */ 3174 /* 0x98 - 0x9F */
2451 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), 3175 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
2452 I(SrcImmFAddr | No64, em_call_far), N, 3176 I(SrcImmFAddr | No64, em_call_far), N,
2453 D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, 3177 II(ImplicitOps | Stack, em_pushf, pushf),
3178 II(ImplicitOps | Stack, em_popf, popf), N, N,
2454 /* 0xA0 - 0xA7 */ 3179 /* 0xA0 - 0xA7 */
2455 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3180 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
2456 I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), 3181 I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
2457 I2bv(SrcSI | DstDI | Mov | String, em_mov), 3182 I2bv(SrcSI | DstDI | Mov | String, em_mov),
2458 D2bv(SrcSI | DstDI | String), 3183 I2bv(SrcSI | DstDI | String, em_cmp),
2459 /* 0xA8 - 0xAF */ 3184 /* 0xA8 - 0xAF */
2460 D2bv(DstAcc | SrcImm), 3185 D2bv(DstAcc | SrcImm),
2461 I2bv(SrcAcc | DstDI | Mov | String, em_mov), 3186 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
2462 I2bv(SrcSI | DstAcc | Mov | String, em_mov), 3187 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
2463 D2bv(SrcAcc | DstDI | String), 3188 I2bv(SrcAcc | DstDI | String, em_cmp),
2464 /* 0xB0 - 0xB7 */ 3189 /* 0xB0 - 0xB7 */
2465 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), 3190 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
2466 /* 0xB8 - 0xBF */ 3191 /* 0xB8 - 0xBF */
@@ -2473,7 +3198,8 @@ static struct opcode opcode_table[256] = {
2473 G(ByteOp, group11), G(0, group11), 3198 G(ByteOp, group11), G(0, group11),
2474 /* 0xC8 - 0xCF */ 3199 /* 0xC8 - 0xCF */
2475 N, N, N, D(ImplicitOps | Stack), 3200 N, N, N, D(ImplicitOps | Stack),
2476 D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), 3201 D(ImplicitOps), DI(SrcImmByte, intn),
3202 D(ImplicitOps | No64), DI(ImplicitOps, iret),
2477 /* 0xD0 - 0xD7 */ 3203 /* 0xD0 - 0xD7 */
2478 D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), 3204 D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
2479 N, N, N, N, 3205 N, N, N, N,
@@ -2481,14 +3207,17 @@ static struct opcode opcode_table[256] = {
2481 N, N, N, N, N, N, N, N, 3207 N, N, N, N, N, N, N, N,
2482 /* 0xE0 - 0xE7 */ 3208 /* 0xE0 - 0xE7 */
2483 X4(D(SrcImmByte)), 3209 X4(D(SrcImmByte)),
2484 D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), 3210 D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in),
3211 D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out),
2485 /* 0xE8 - 0xEF */ 3212 /* 0xE8 - 0xEF */
2486 D(SrcImm | Stack), D(SrcImm | ImplicitOps), 3213 D(SrcImm | Stack), D(SrcImm | ImplicitOps),
2487 D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), 3214 D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
2488 D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), 3215 D2bvIP(SrcNone | DstAcc, in, check_perm_in),
3216 D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out),
2489 /* 0xF0 - 0xF7 */ 3217 /* 0xF0 - 0xF7 */
2490 N, N, N, N, 3218 N, DI(ImplicitOps, icebp), N, N,
2491 D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), 3219 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
3220 G(ByteOp, group3), G(0, group3),
2492 /* 0xF8 - 0xFF */ 3221 /* 0xF8 - 0xFF */
2493 D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), 3222 D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps),
2494 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), 3223 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
@@ -2496,20 +3225,24 @@ static struct opcode opcode_table[256] = {
2496 3225
2497static struct opcode twobyte_table[256] = { 3226static struct opcode twobyte_table[256] = {
2498 /* 0x00 - 0x0F */ 3227 /* 0x00 - 0x0F */
2499 N, GD(0, &group7), N, N, 3228 G(0, group6), GD(0, &group7), N, N,
2500 N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, 3229 N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N,
2501 D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, 3230 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
2502 N, D(ImplicitOps | ModRM), N, N, 3231 N, D(ImplicitOps | ModRM), N, N,
2503 /* 0x10 - 0x1F */ 3232 /* 0x10 - 0x1F */
2504 N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, 3233 N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
2505 /* 0x20 - 0x2F */ 3234 /* 0x20 - 0x2F */
2506 D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), 3235 DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
2507 D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), 3236 DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
3237 DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write),
3238 DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write),
2508 N, N, N, N, 3239 N, N, N, N,
2509 N, N, N, N, N, N, N, N, 3240 N, N, N, N, N, N, N, N,
2510 /* 0x30 - 0x3F */ 3241 /* 0x30 - 0x3F */
2511 D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), 3242 DI(ImplicitOps | Priv, wrmsr),
2512 D(ImplicitOps | Priv), N, 3243 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
3244 DI(ImplicitOps | Priv, rdmsr),
3245 DIP(ImplicitOps | Priv, rdpmc, check_rdpmc),
2513 D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), 3246 D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific),
2514 N, N, 3247 N, N,
2515 N, N, N, N, N, N, N, N, 3248 N, N, N, N, N, N, N, N,
@@ -2518,21 +3251,27 @@ static struct opcode twobyte_table[256] = {
2518 /* 0x50 - 0x5F */ 3251 /* 0x50 - 0x5F */
2519 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3252 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2520 /* 0x60 - 0x6F */ 3253 /* 0x60 - 0x6F */
2521 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3254 N, N, N, N,
3255 N, N, N, N,
3256 N, N, N, N,
3257 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
2522 /* 0x70 - 0x7F */ 3258 /* 0x70 - 0x7F */
2523 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3259 N, N, N, N,
3260 N, N, N, N,
3261 N, N, N, N,
3262 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
2524 /* 0x80 - 0x8F */ 3263 /* 0x80 - 0x8F */
2525 X16(D(SrcImm)), 3264 X16(D(SrcImm)),
2526 /* 0x90 - 0x9F */ 3265 /* 0x90 - 0x9F */
2527 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), 3266 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
2528 /* 0xA0 - 0xA7 */ 3267 /* 0xA0 - 0xA7 */
2529 D(ImplicitOps | Stack), D(ImplicitOps | Stack), 3268 D(ImplicitOps | Stack), D(ImplicitOps | Stack),
2530 N, D(DstMem | SrcReg | ModRM | BitOp), 3269 DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp),
2531 D(DstMem | SrcReg | Src2ImmByte | ModRM), 3270 D(DstMem | SrcReg | Src2ImmByte | ModRM),
2532 D(DstMem | SrcReg | Src2CL | ModRM), N, N, 3271 D(DstMem | SrcReg | Src2CL | ModRM), N, N,
2533 /* 0xA8 - 0xAF */ 3272 /* 0xA8 - 0xAF */
2534 D(ImplicitOps | Stack), D(ImplicitOps | Stack), 3273 D(ImplicitOps | Stack), D(ImplicitOps | Stack),
2535 N, D(DstMem | SrcReg | ModRM | BitOp | Lock), 3274 DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock),
2536 D(DstMem | SrcReg | Src2ImmByte | ModRM), 3275 D(DstMem | SrcReg | Src2ImmByte | ModRM),
2537 D(DstMem | SrcReg | Src2CL | ModRM), 3276 D(DstMem | SrcReg | Src2CL | ModRM),
2538 D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), 3277 D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
@@ -2564,10 +3303,13 @@ static struct opcode twobyte_table[256] = {
2564#undef G 3303#undef G
2565#undef GD 3304#undef GD
2566#undef I 3305#undef I
3306#undef GP
3307#undef EXT
2567 3308
2568#undef D2bv 3309#undef D2bv
3310#undef D2bvIP
2569#undef I2bv 3311#undef I2bv
2570#undef D6ALU 3312#undef I6ALU
2571 3313
2572static unsigned imm_size(struct decode_cache *c) 3314static unsigned imm_size(struct decode_cache *c)
2573{ 3315{
@@ -2625,8 +3367,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
2625 struct decode_cache *c = &ctxt->decode; 3367 struct decode_cache *c = &ctxt->decode;
2626 int rc = X86EMUL_CONTINUE; 3368 int rc = X86EMUL_CONTINUE;
2627 int mode = ctxt->mode; 3369 int mode = ctxt->mode;
2628 int def_op_bytes, def_ad_bytes, dual, goffset; 3370 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
2629 struct opcode opcode, *g_mod012, *g_mod3; 3371 bool op_prefix = false;
3372 struct opcode opcode;
2630 struct operand memop = { .type = OP_NONE }; 3373 struct operand memop = { .type = OP_NONE };
2631 3374
2632 c->eip = ctxt->eip; 3375 c->eip = ctxt->eip;
@@ -2634,7 +3377,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
2634 c->fetch.end = c->fetch.start + insn_len; 3377 c->fetch.end = c->fetch.start + insn_len;
2635 if (insn_len > 0) 3378 if (insn_len > 0)
2636 memcpy(c->fetch.data, insn, insn_len); 3379 memcpy(c->fetch.data, insn, insn_len);
2637 ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
2638 3380
2639 switch (mode) { 3381 switch (mode) {
2640 case X86EMUL_MODE_REAL: 3382 case X86EMUL_MODE_REAL:
@@ -2662,6 +3404,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
2662 for (;;) { 3404 for (;;) {
2663 switch (c->b = insn_fetch(u8, 1, c->eip)) { 3405 switch (c->b = insn_fetch(u8, 1, c->eip)) {
2664 case 0x66: /* operand-size override */ 3406 case 0x66: /* operand-size override */
3407 op_prefix = true;
2665 /* switch between 2/4 bytes */ 3408 /* switch between 2/4 bytes */
2666 c->op_bytes = def_op_bytes ^ 6; 3409 c->op_bytes = def_op_bytes ^ 6;
2667 break; 3410 break;
@@ -2692,10 +3435,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
2692 c->lock_prefix = 1; 3435 c->lock_prefix = 1;
2693 break; 3436 break;
2694 case 0xf2: /* REPNE/REPNZ */ 3437 case 0xf2: /* REPNE/REPNZ */
2695 c->rep_prefix = REPNE_PREFIX;
2696 break;
2697 case 0xf3: /* REP/REPE/REPZ */ 3438 case 0xf3: /* REP/REPE/REPZ */
2698 c->rep_prefix = REPE_PREFIX; 3439 c->rep_prefix = c->b;
2699 break; 3440 break;
2700 default: 3441 default:
2701 goto done_prefixes; 3442 goto done_prefixes;
@@ -2722,29 +3463,49 @@ done_prefixes:
2722 } 3463 }
2723 c->d = opcode.flags; 3464 c->d = opcode.flags;
2724 3465
2725 if (c->d & Group) { 3466 while (c->d & GroupMask) {
2726 dual = c->d & GroupDual; 3467 switch (c->d & GroupMask) {
2727 c->modrm = insn_fetch(u8, 1, c->eip); 3468 case Group:
2728 --c->eip; 3469 c->modrm = insn_fetch(u8, 1, c->eip);
2729 3470 --c->eip;
2730 if (c->d & GroupDual) { 3471 goffset = (c->modrm >> 3) & 7;
2731 g_mod012 = opcode.u.gdual->mod012; 3472 opcode = opcode.u.group[goffset];
2732 g_mod3 = opcode.u.gdual->mod3; 3473 break;
2733 } else 3474 case GroupDual:
2734 g_mod012 = g_mod3 = opcode.u.group; 3475 c->modrm = insn_fetch(u8, 1, c->eip);
2735 3476 --c->eip;
2736 c->d &= ~(Group | GroupDual); 3477 goffset = (c->modrm >> 3) & 7;
2737 3478 if ((c->modrm >> 6) == 3)
2738 goffset = (c->modrm >> 3) & 7; 3479 opcode = opcode.u.gdual->mod3[goffset];
3480 else
3481 opcode = opcode.u.gdual->mod012[goffset];
3482 break;
3483 case RMExt:
3484 goffset = c->modrm & 7;
3485 opcode = opcode.u.group[goffset];
3486 break;
3487 case Prefix:
3488 if (c->rep_prefix && op_prefix)
3489 return X86EMUL_UNHANDLEABLE;
3490 simd_prefix = op_prefix ? 0x66 : c->rep_prefix;
3491 switch (simd_prefix) {
3492 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
3493 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
3494 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
3495 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
3496 }
3497 break;
3498 default:
3499 return X86EMUL_UNHANDLEABLE;
3500 }
2739 3501
2740 if ((c->modrm >> 6) == 3) 3502 c->d &= ~GroupMask;
2741 opcode = g_mod3[goffset];
2742 else
2743 opcode = g_mod012[goffset];
2744 c->d |= opcode.flags; 3503 c->d |= opcode.flags;
2745 } 3504 }
2746 3505
2747 c->execute = opcode.u.execute; 3506 c->execute = opcode.u.execute;
3507 c->check_perm = opcode.check_perm;
3508 c->intercept = opcode.intercept;
2748 3509
2749 /* Unrecognised? */ 3510 /* Unrecognised? */
2750 if (c->d == 0 || (c->d & Undefined)) 3511 if (c->d == 0 || (c->d & Undefined))
@@ -2763,6 +3524,9 @@ done_prefixes:
2763 c->op_bytes = 4; 3524 c->op_bytes = 4;
2764 } 3525 }
2765 3526
3527 if (c->d & Sse)
3528 c->op_bytes = 16;
3529
2766 /* ModRM and SIB bytes. */ 3530 /* ModRM and SIB bytes. */
2767 if (c->d & ModRM) { 3531 if (c->d & ModRM) {
2768 rc = decode_modrm(ctxt, ops, &memop); 3532 rc = decode_modrm(ctxt, ops, &memop);
@@ -2776,7 +3540,7 @@ done_prefixes:
2776 if (!c->has_seg_override) 3540 if (!c->has_seg_override)
2777 set_seg_override(c, VCPU_SREG_DS); 3541 set_seg_override(c, VCPU_SREG_DS);
2778 3542
2779 memop.addr.mem.seg = seg_override(ctxt, ops, c); 3543 memop.addr.mem.seg = seg_override(ctxt, c);
2780 3544
2781 if (memop.type == OP_MEM && c->ad_bytes != 8) 3545 if (memop.type == OP_MEM && c->ad_bytes != 8)
2782 memop.addr.mem.ea = (u32)memop.addr.mem.ea; 3546 memop.addr.mem.ea = (u32)memop.addr.mem.ea;
@@ -2792,7 +3556,7 @@ done_prefixes:
2792 case SrcNone: 3556 case SrcNone:
2793 break; 3557 break;
2794 case SrcReg: 3558 case SrcReg:
2795 decode_register_operand(&c->src, c, 0); 3559 decode_register_operand(ctxt, &c->src, c, 0);
2796 break; 3560 break;
2797 case SrcMem16: 3561 case SrcMem16:
2798 memop.bytes = 2; 3562 memop.bytes = 2;
@@ -2836,7 +3600,7 @@ done_prefixes:
2836 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 3600 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2837 c->src.addr.mem.ea = 3601 c->src.addr.mem.ea =
2838 register_address(c, c->regs[VCPU_REGS_RSI]); 3602 register_address(c, c->regs[VCPU_REGS_RSI]);
2839 c->src.addr.mem.seg = seg_override(ctxt, ops, c), 3603 c->src.addr.mem.seg = seg_override(ctxt, c);
2840 c->src.val = 0; 3604 c->src.val = 0;
2841 break; 3605 break;
2842 case SrcImmFAddr: 3606 case SrcImmFAddr:
@@ -2883,7 +3647,7 @@ done_prefixes:
2883 /* Decode and fetch the destination operand: register or memory. */ 3647 /* Decode and fetch the destination operand: register or memory. */
2884 switch (c->d & DstMask) { 3648 switch (c->d & DstMask) {
2885 case DstReg: 3649 case DstReg:
2886 decode_register_operand(&c->dst, c, 3650 decode_register_operand(ctxt, &c->dst, c,
2887 c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); 3651 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
2888 break; 3652 break;
2889 case DstImmUByte: 3653 case DstImmUByte:
@@ -2926,7 +3690,7 @@ done_prefixes:
2926 } 3690 }
2927 3691
2928done: 3692done:
2929 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 3693 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
2930} 3694}
2931 3695
2932static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) 3696static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
@@ -2979,12 +3743,51 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
2979 goto done; 3743 goto done;
2980 } 3744 }
2981 3745
3746 if ((c->d & Sse)
3747 && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)
3748 || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
3749 rc = emulate_ud(ctxt);
3750 goto done;
3751 }
3752
3753 if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
3754 rc = emulate_nm(ctxt);
3755 goto done;
3756 }
3757
3758 if (unlikely(ctxt->guest_mode) && c->intercept) {
3759 rc = emulator_check_intercept(ctxt, c->intercept,
3760 X86_ICPT_PRE_EXCEPT);
3761 if (rc != X86EMUL_CONTINUE)
3762 goto done;
3763 }
3764
2982 /* Privileged instruction can be executed only in CPL=0 */ 3765 /* Privileged instruction can be executed only in CPL=0 */
2983 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { 3766 if ((c->d & Priv) && ops->cpl(ctxt)) {
2984 rc = emulate_gp(ctxt, 0); 3767 rc = emulate_gp(ctxt, 0);
2985 goto done; 3768 goto done;
2986 } 3769 }
2987 3770
3771 /* Instruction can only be executed in protected mode */
3772 if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
3773 rc = emulate_ud(ctxt);
3774 goto done;
3775 }
3776
3777 /* Do instruction specific permission checks */
3778 if (c->check_perm) {
3779 rc = c->check_perm(ctxt);
3780 if (rc != X86EMUL_CONTINUE)
3781 goto done;
3782 }
3783
3784 if (unlikely(ctxt->guest_mode) && c->intercept) {
3785 rc = emulator_check_intercept(ctxt, c->intercept,
3786 X86_ICPT_POST_EXCEPT);
3787 if (rc != X86EMUL_CONTINUE)
3788 goto done;
3789 }
3790
2988 if (c->rep_prefix && (c->d & String)) { 3791 if (c->rep_prefix && (c->d & String)) {
2989 /* All REP prefixes have the same first termination condition */ 3792 /* All REP prefixes have the same first termination condition */
2990 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { 3793 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
@@ -2994,16 +3797,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
2994 } 3797 }
2995 3798
2996 if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { 3799 if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) {
2997 rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), 3800 rc = segmented_read(ctxt, c->src.addr.mem,
2998 c->src.valptr, c->src.bytes); 3801 c->src.valptr, c->src.bytes);
2999 if (rc != X86EMUL_CONTINUE) 3802 if (rc != X86EMUL_CONTINUE)
3000 goto done; 3803 goto done;
3001 c->src.orig_val64 = c->src.val64; 3804 c->src.orig_val64 = c->src.val64;
3002 } 3805 }
3003 3806
3004 if (c->src2.type == OP_MEM) { 3807 if (c->src2.type == OP_MEM) {
3005 rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), 3808 rc = segmented_read(ctxt, c->src2.addr.mem,
3006 &c->src2.val, c->src2.bytes); 3809 &c->src2.val, c->src2.bytes);
3007 if (rc != X86EMUL_CONTINUE) 3810 if (rc != X86EMUL_CONTINUE)
3008 goto done; 3811 goto done;
3009 } 3812 }
@@ -3014,7 +3817,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
3014 3817
3015 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { 3818 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
3016 /* optimisation - avoid slow emulated read if Mov */ 3819 /* optimisation - avoid slow emulated read if Mov */
3017 rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), 3820 rc = segmented_read(ctxt, c->dst.addr.mem,
3018 &c->dst.val, c->dst.bytes); 3821 &c->dst.val, c->dst.bytes);
3019 if (rc != X86EMUL_CONTINUE) 3822 if (rc != X86EMUL_CONTINUE)
3020 goto done; 3823 goto done;
@@ -3023,6 +3826,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
3023 3826
3024special_insn: 3827special_insn:
3025 3828
3829 if (unlikely(ctxt->guest_mode) && c->intercept) {
3830 rc = emulator_check_intercept(ctxt, c->intercept,
3831 X86_ICPT_POST_MEMACCESS);
3832 if (rc != X86EMUL_CONTINUE)
3833 goto done;
3834 }
3835
3026 if (c->execute) { 3836 if (c->execute) {
3027 rc = c->execute(ctxt); 3837 rc = c->execute(ctxt);
3028 if (rc != X86EMUL_CONTINUE) 3838 if (rc != X86EMUL_CONTINUE)
@@ -3034,75 +3844,33 @@ special_insn:
3034 goto twobyte_insn; 3844 goto twobyte_insn;
3035 3845
3036 switch (c->b) { 3846 switch (c->b) {
3037 case 0x00 ... 0x05:
3038 add: /* add */
3039 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
3040 break;
3041 case 0x06: /* push es */ 3847 case 0x06: /* push es */
3042 emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); 3848 rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
3043 break; 3849 break;
3044 case 0x07: /* pop es */ 3850 case 0x07: /* pop es */
3045 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); 3851 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
3046 break; 3852 break;
3047 case 0x08 ... 0x0d:
3048 or: /* or */
3049 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
3050 break;
3051 case 0x0e: /* push cs */ 3853 case 0x0e: /* push cs */
3052 emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); 3854 rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
3053 break;
3054 case 0x10 ... 0x15:
3055 adc: /* adc */
3056 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
3057 break; 3855 break;
3058 case 0x16: /* push ss */ 3856 case 0x16: /* push ss */
3059 emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); 3857 rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
3060 break; 3858 break;
3061 case 0x17: /* pop ss */ 3859 case 0x17: /* pop ss */
3062 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); 3860 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
3063 break; 3861 break;
3064 case 0x18 ... 0x1d:
3065 sbb: /* sbb */
3066 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
3067 break;
3068 case 0x1e: /* push ds */ 3862 case 0x1e: /* push ds */
3069 emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); 3863 rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
3070 break; 3864 break;
3071 case 0x1f: /* pop ds */ 3865 case 0x1f: /* pop ds */
3072 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); 3866 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
3073 break; 3867 break;
3074 case 0x20 ... 0x25:
3075 and: /* and */
3076 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
3077 break;
3078 case 0x28 ... 0x2d:
3079 sub: /* sub */
3080 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
3081 break;
3082 case 0x30 ... 0x35:
3083 xor: /* xor */
3084 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
3085 break;
3086 case 0x38 ... 0x3d:
3087 cmp: /* cmp */
3088 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3089 break;
3090 case 0x40 ... 0x47: /* inc r16/r32 */ 3868 case 0x40 ... 0x47: /* inc r16/r32 */
3091 emulate_1op("inc", c->dst, ctxt->eflags); 3869 emulate_1op("inc", c->dst, ctxt->eflags);
3092 break; 3870 break;
3093 case 0x48 ... 0x4f: /* dec r16/r32 */ 3871 case 0x48 ... 0x4f: /* dec r16/r32 */
3094 emulate_1op("dec", c->dst, ctxt->eflags); 3872 emulate_1op("dec", c->dst, ctxt->eflags);
3095 break; 3873 break;
3096 case 0x58 ... 0x5f: /* pop reg */
3097 pop_instruction:
3098 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
3099 break;
3100 case 0x60: /* pusha */
3101 rc = emulate_pusha(ctxt, ops);
3102 break;
3103 case 0x61: /* popa */
3104 rc = emulate_popa(ctxt, ops);
3105 break;
3106 case 0x63: /* movsxd */ 3874 case 0x63: /* movsxd */
3107 if (ctxt->mode != X86EMUL_MODE_PROT64) 3875 if (ctxt->mode != X86EMUL_MODE_PROT64)
3108 goto cannot_emulate; 3876 goto cannot_emulate;
@@ -3121,26 +3889,6 @@ special_insn:
3121 if (test_cc(c->b, ctxt->eflags)) 3889 if (test_cc(c->b, ctxt->eflags))
3122 jmp_rel(c, c->src.val); 3890 jmp_rel(c, c->src.val);
3123 break; 3891 break;
3124 case 0x80 ... 0x83: /* Grp1 */
3125 switch (c->modrm_reg) {
3126 case 0:
3127 goto add;
3128 case 1:
3129 goto or;
3130 case 2:
3131 goto adc;
3132 case 3:
3133 goto sbb;
3134 case 4:
3135 goto and;
3136 case 5:
3137 goto sub;
3138 case 6:
3139 goto xor;
3140 case 7:
3141 goto cmp;
3142 }
3143 break;
3144 case 0x84 ... 0x85: 3892 case 0x84 ... 0x85:
3145 test: 3893 test:
3146 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); 3894 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
@@ -3162,7 +3910,7 @@ special_insn:
3162 rc = emulate_ud(ctxt); 3910 rc = emulate_ud(ctxt);
3163 goto done; 3911 goto done;
3164 } 3912 }
3165 c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); 3913 c->dst.val = get_segment_selector(ctxt, c->modrm_reg);
3166 break; 3914 break;
3167 case 0x8d: /* lea r16/r32, m */ 3915 case 0x8d: /* lea r16/r32, m */
3168 c->dst.val = c->src.addr.mem.ea; 3916 c->dst.val = c->src.addr.mem.ea;
@@ -3187,7 +3935,7 @@ special_insn:
3187 break; 3935 break;
3188 } 3936 }
3189 case 0x8f: /* pop (sole member of Grp1a) */ 3937 case 0x8f: /* pop (sole member of Grp1a) */
3190 rc = emulate_grp1a(ctxt, ops); 3938 rc = em_grp1a(ctxt);
3191 break; 3939 break;
3192 case 0x90 ... 0x97: /* nop / xchg reg, rax */ 3940 case 0x90 ... 0x97: /* nop / xchg reg, rax */
3193 if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) 3941 if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX])
@@ -3200,31 +3948,17 @@ special_insn:
3200 case 8: c->dst.val = (s32)c->dst.val; break; 3948 case 8: c->dst.val = (s32)c->dst.val; break;
3201 } 3949 }
3202 break; 3950 break;
3203 case 0x9c: /* pushf */
3204 c->src.val = (unsigned long) ctxt->eflags;
3205 emulate_push(ctxt, ops);
3206 break;
3207 case 0x9d: /* popf */
3208 c->dst.type = OP_REG;
3209 c->dst.addr.reg = &ctxt->eflags;
3210 c->dst.bytes = c->op_bytes;
3211 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
3212 break;
3213 case 0xa6 ... 0xa7: /* cmps */
3214 c->dst.type = OP_NONE; /* Disable writeback. */
3215 goto cmp;
3216 case 0xa8 ... 0xa9: /* test ax, imm */ 3951 case 0xa8 ... 0xa9: /* test ax, imm */
3217 goto test; 3952 goto test;
3218 case 0xae ... 0xaf: /* scas */
3219 goto cmp;
3220 case 0xc0 ... 0xc1: 3953 case 0xc0 ... 0xc1:
3221 emulate_grp2(ctxt); 3954 rc = em_grp2(ctxt);
3222 break; 3955 break;
3223 case 0xc3: /* ret */ 3956 case 0xc3: /* ret */
3224 c->dst.type = OP_REG; 3957 c->dst.type = OP_REG;
3225 c->dst.addr.reg = &c->eip; 3958 c->dst.addr.reg = &c->eip;
3226 c->dst.bytes = c->op_bytes; 3959 c->dst.bytes = c->op_bytes;
3227 goto pop_instruction; 3960 rc = em_pop(ctxt);
3961 break;
3228 case 0xc4: /* les */ 3962 case 0xc4: /* les */
3229 rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); 3963 rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES);
3230 break; 3964 break;
@@ -3252,11 +3986,11 @@ special_insn:
3252 rc = emulate_iret(ctxt, ops); 3986 rc = emulate_iret(ctxt, ops);
3253 break; 3987 break;
3254 case 0xd0 ... 0xd1: /* Grp2 */ 3988 case 0xd0 ... 0xd1: /* Grp2 */
3255 emulate_grp2(ctxt); 3989 rc = em_grp2(ctxt);
3256 break; 3990 break;
3257 case 0xd2 ... 0xd3: /* Grp2 */ 3991 case 0xd2 ... 0xd3: /* Grp2 */
3258 c->src.val = c->regs[VCPU_REGS_RCX]; 3992 c->src.val = c->regs[VCPU_REGS_RCX];
3259 emulate_grp2(ctxt); 3993 rc = em_grp2(ctxt);
3260 break; 3994 break;
3261 case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ 3995 case 0xe0 ... 0xe2: /* loop/loopz/loopnz */
3262 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); 3996 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
@@ -3278,23 +4012,14 @@ special_insn:
3278 long int rel = c->src.val; 4012 long int rel = c->src.val;
3279 c->src.val = (unsigned long) c->eip; 4013 c->src.val = (unsigned long) c->eip;
3280 jmp_rel(c, rel); 4014 jmp_rel(c, rel);
3281 emulate_push(ctxt, ops); 4015 rc = em_push(ctxt);
3282 break; 4016 break;
3283 } 4017 }
3284 case 0xe9: /* jmp rel */ 4018 case 0xe9: /* jmp rel */
3285 goto jmp; 4019 goto jmp;
3286 case 0xea: { /* jmp far */ 4020 case 0xea: /* jmp far */
3287 unsigned short sel; 4021 rc = em_jmp_far(ctxt);
3288 jump_far:
3289 memcpy(&sel, c->src.valptr + c->op_bytes, 2);
3290
3291 if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
3292 goto done;
3293
3294 c->eip = 0;
3295 memcpy(&c->eip, c->src.valptr, c->op_bytes);
3296 break; 4022 break;
3297 }
3298 case 0xeb: 4023 case 0xeb:
3299 jmp: /* jmp rel short */ 4024 jmp: /* jmp rel short */
3300 jmp_rel(c, c->src.val); 4025 jmp_rel(c, c->src.val);
@@ -3304,11 +4029,6 @@ special_insn:
3304 case 0xed: /* in (e/r)ax,dx */ 4029 case 0xed: /* in (e/r)ax,dx */
3305 c->src.val = c->regs[VCPU_REGS_RDX]; 4030 c->src.val = c->regs[VCPU_REGS_RDX];
3306 do_io_in: 4031 do_io_in:
3307 c->dst.bytes = min(c->dst.bytes, 4u);
3308 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
3309 rc = emulate_gp(ctxt, 0);
3310 goto done;
3311 }
3312 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, 4032 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
3313 &c->dst.val)) 4033 &c->dst.val))
3314 goto done; /* IO is needed */ 4034 goto done; /* IO is needed */
@@ -3317,25 +4037,19 @@ special_insn:
3317 case 0xef: /* out dx,(e/r)ax */ 4037 case 0xef: /* out dx,(e/r)ax */
3318 c->dst.val = c->regs[VCPU_REGS_RDX]; 4038 c->dst.val = c->regs[VCPU_REGS_RDX];
3319 do_io_out: 4039 do_io_out:
3320 c->src.bytes = min(c->src.bytes, 4u); 4040 ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val,
3321 if (!emulator_io_permited(ctxt, ops, c->dst.val, 4041 &c->src.val, 1);
3322 c->src.bytes)) {
3323 rc = emulate_gp(ctxt, 0);
3324 goto done;
3325 }
3326 ops->pio_out_emulated(c->src.bytes, c->dst.val,
3327 &c->src.val, 1, ctxt->vcpu);
3328 c->dst.type = OP_NONE; /* Disable writeback. */ 4042 c->dst.type = OP_NONE; /* Disable writeback. */
3329 break; 4043 break;
3330 case 0xf4: /* hlt */ 4044 case 0xf4: /* hlt */
3331 ctxt->vcpu->arch.halt_request = 1; 4045 ctxt->ops->halt(ctxt);
3332 break; 4046 break;
3333 case 0xf5: /* cmc */ 4047 case 0xf5: /* cmc */
3334 /* complement carry flag from eflags reg */ 4048 /* complement carry flag from eflags reg */
3335 ctxt->eflags ^= EFLG_CF; 4049 ctxt->eflags ^= EFLG_CF;
3336 break; 4050 break;
3337 case 0xf6 ... 0xf7: /* Grp3 */ 4051 case 0xf6 ... 0xf7: /* Grp3 */
3338 rc = emulate_grp3(ctxt, ops); 4052 rc = em_grp3(ctxt);
3339 break; 4053 break;
3340 case 0xf8: /* clc */ 4054 case 0xf8: /* clc */
3341 ctxt->eflags &= ~EFLG_CF; 4055 ctxt->eflags &= ~EFLG_CF;
@@ -3366,13 +4080,11 @@ special_insn:
3366 ctxt->eflags |= EFLG_DF; 4080 ctxt->eflags |= EFLG_DF;
3367 break; 4081 break;
3368 case 0xfe: /* Grp4 */ 4082 case 0xfe: /* Grp4 */
3369 grp45: 4083 rc = em_grp45(ctxt);
3370 rc = emulate_grp45(ctxt, ops);
3371 break; 4084 break;
3372 case 0xff: /* Grp5 */ 4085 case 0xff: /* Grp5 */
3373 if (c->modrm_reg == 5) 4086 rc = em_grp45(ctxt);
3374 goto jump_far; 4087 break;
3375 goto grp45;
3376 default: 4088 default:
3377 goto cannot_emulate; 4089 goto cannot_emulate;
3378 } 4090 }
@@ -3381,7 +4093,7 @@ special_insn:
3381 goto done; 4093 goto done;
3382 4094
3383writeback: 4095writeback:
3384 rc = writeback(ctxt, ops); 4096 rc = writeback(ctxt);
3385 if (rc != X86EMUL_CONTINUE) 4097 if (rc != X86EMUL_CONTINUE)
3386 goto done; 4098 goto done;
3387 4099
@@ -3392,7 +4104,7 @@ writeback:
3392 c->dst.type = saved_dst_type; 4104 c->dst.type = saved_dst_type;
3393 4105
3394 if ((c->d & SrcMask) == SrcSI) 4106 if ((c->d & SrcMask) == SrcSI)
3395 string_addr_inc(ctxt, seg_override(ctxt, ops, c), 4107 string_addr_inc(ctxt, seg_override(ctxt, c),
3396 VCPU_REGS_RSI, &c->src); 4108 VCPU_REGS_RSI, &c->src);
3397 4109
3398 if ((c->d & DstMask) == DstDI) 4110 if ((c->d & DstMask) == DstDI)
@@ -3427,115 +4139,34 @@ writeback:
3427done: 4139done:
3428 if (rc == X86EMUL_PROPAGATE_FAULT) 4140 if (rc == X86EMUL_PROPAGATE_FAULT)
3429 ctxt->have_exception = true; 4141 ctxt->have_exception = true;
4142 if (rc == X86EMUL_INTERCEPTED)
4143 return EMULATION_INTERCEPTED;
4144
3430 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; 4145 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3431 4146
3432twobyte_insn: 4147twobyte_insn:
3433 switch (c->b) { 4148 switch (c->b) {
3434 case 0x01: /* lgdt, lidt, lmsw */
3435 switch (c->modrm_reg) {
3436 u16 size;
3437 unsigned long address;
3438
3439 case 0: /* vmcall */
3440 if (c->modrm_mod != 3 || c->modrm_rm != 1)
3441 goto cannot_emulate;
3442
3443 rc = kvm_fix_hypercall(ctxt->vcpu);
3444 if (rc != X86EMUL_CONTINUE)
3445 goto done;
3446
3447 /* Let the processor re-execute the fixed hypercall */
3448 c->eip = ctxt->eip;
3449 /* Disable writeback. */
3450 c->dst.type = OP_NONE;
3451 break;
3452 case 2: /* lgdt */
3453 rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3454 &size, &address, c->op_bytes);
3455 if (rc != X86EMUL_CONTINUE)
3456 goto done;
3457 realmode_lgdt(ctxt->vcpu, size, address);
3458 /* Disable writeback. */
3459 c->dst.type = OP_NONE;
3460 break;
3461 case 3: /* lidt/vmmcall */
3462 if (c->modrm_mod == 3) {
3463 switch (c->modrm_rm) {
3464 case 1:
3465 rc = kvm_fix_hypercall(ctxt->vcpu);
3466 break;
3467 default:
3468 goto cannot_emulate;
3469 }
3470 } else {
3471 rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3472 &size, &address,
3473 c->op_bytes);
3474 if (rc != X86EMUL_CONTINUE)
3475 goto done;
3476 realmode_lidt(ctxt->vcpu, size, address);
3477 }
3478 /* Disable writeback. */
3479 c->dst.type = OP_NONE;
3480 break;
3481 case 4: /* smsw */
3482 c->dst.bytes = 2;
3483 c->dst.val = ops->get_cr(0, ctxt->vcpu);
3484 break;
3485 case 6: /* lmsw */
3486 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) |
3487 (c->src.val & 0x0f), ctxt->vcpu);
3488 c->dst.type = OP_NONE;
3489 break;
3490 case 5: /* not defined */
3491 emulate_ud(ctxt);
3492 rc = X86EMUL_PROPAGATE_FAULT;
3493 goto done;
3494 case 7: /* invlpg*/
3495 emulate_invlpg(ctxt->vcpu,
3496 linear(ctxt, c->src.addr.mem));
3497 /* Disable writeback. */
3498 c->dst.type = OP_NONE;
3499 break;
3500 default:
3501 goto cannot_emulate;
3502 }
3503 break;
3504 case 0x05: /* syscall */ 4149 case 0x05: /* syscall */
3505 rc = emulate_syscall(ctxt, ops); 4150 rc = emulate_syscall(ctxt, ops);
3506 break; 4151 break;
3507 case 0x06: 4152 case 0x06:
3508 emulate_clts(ctxt->vcpu); 4153 rc = em_clts(ctxt);
3509 break; 4154 break;
3510 case 0x09: /* wbinvd */ 4155 case 0x09: /* wbinvd */
3511 kvm_emulate_wbinvd(ctxt->vcpu); 4156 (ctxt->ops->wbinvd)(ctxt);
3512 break; 4157 break;
3513 case 0x08: /* invd */ 4158 case 0x08: /* invd */
3514 case 0x0d: /* GrpP (prefetch) */ 4159 case 0x0d: /* GrpP (prefetch) */
3515 case 0x18: /* Grp16 (prefetch/nop) */ 4160 case 0x18: /* Grp16 (prefetch/nop) */
3516 break; 4161 break;
3517 case 0x20: /* mov cr, reg */ 4162 case 0x20: /* mov cr, reg */
3518 switch (c->modrm_reg) { 4163 c->dst.val = ops->get_cr(ctxt, c->modrm_reg);
3519 case 1:
3520 case 5 ... 7:
3521 case 9 ... 15:
3522 emulate_ud(ctxt);
3523 rc = X86EMUL_PROPAGATE_FAULT;
3524 goto done;
3525 }
3526 c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3527 break; 4164 break;
3528 case 0x21: /* mov from dr to reg */ 4165 case 0x21: /* mov from dr to reg */
3529 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 4166 ops->get_dr(ctxt, c->modrm_reg, &c->dst.val);
3530 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3531 emulate_ud(ctxt);
3532 rc = X86EMUL_PROPAGATE_FAULT;
3533 goto done;
3534 }
3535 ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu);
3536 break; 4167 break;
3537 case 0x22: /* mov reg, cr */ 4168 case 0x22: /* mov reg, cr */
3538 if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { 4169 if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) {
3539 emulate_gp(ctxt, 0); 4170 emulate_gp(ctxt, 0);
3540 rc = X86EMUL_PROPAGATE_FAULT; 4171 rc = X86EMUL_PROPAGATE_FAULT;
3541 goto done; 4172 goto done;
@@ -3543,16 +4174,9 @@ twobyte_insn:
3543 c->dst.type = OP_NONE; 4174 c->dst.type = OP_NONE;
3544 break; 4175 break;
3545 case 0x23: /* mov from reg to dr */ 4176 case 0x23: /* mov from reg to dr */
3546 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 4177 if (ops->set_dr(ctxt, c->modrm_reg, c->src.val &
3547 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3548 emulate_ud(ctxt);
3549 rc = X86EMUL_PROPAGATE_FAULT;
3550 goto done;
3551 }
3552
3553 if (ops->set_dr(c->modrm_reg, c->src.val &
3554 ((ctxt->mode == X86EMUL_MODE_PROT64) ? 4178 ((ctxt->mode == X86EMUL_MODE_PROT64) ?
3555 ~0ULL : ~0U), ctxt->vcpu) < 0) { 4179 ~0ULL : ~0U)) < 0) {
3556 /* #UD condition is already handled by the code above */ 4180 /* #UD condition is already handled by the code above */
3557 emulate_gp(ctxt, 0); 4181 emulate_gp(ctxt, 0);
3558 rc = X86EMUL_PROPAGATE_FAULT; 4182 rc = X86EMUL_PROPAGATE_FAULT;
@@ -3565,7 +4189,7 @@ twobyte_insn:
3565 /* wrmsr */ 4189 /* wrmsr */
3566 msr_data = (u32)c->regs[VCPU_REGS_RAX] 4190 msr_data = (u32)c->regs[VCPU_REGS_RAX]
3567 | ((u64)c->regs[VCPU_REGS_RDX] << 32); 4191 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
3568 if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { 4192 if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) {
3569 emulate_gp(ctxt, 0); 4193 emulate_gp(ctxt, 0);
3570 rc = X86EMUL_PROPAGATE_FAULT; 4194 rc = X86EMUL_PROPAGATE_FAULT;
3571 goto done; 4195 goto done;
@@ -3574,7 +4198,7 @@ twobyte_insn:
3574 break; 4198 break;
3575 case 0x32: 4199 case 0x32:
3576 /* rdmsr */ 4200 /* rdmsr */
3577 if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { 4201 if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) {
3578 emulate_gp(ctxt, 0); 4202 emulate_gp(ctxt, 0);
3579 rc = X86EMUL_PROPAGATE_FAULT; 4203 rc = X86EMUL_PROPAGATE_FAULT;
3580 goto done; 4204 goto done;
@@ -3603,7 +4227,7 @@ twobyte_insn:
3603 c->dst.val = test_cc(c->b, ctxt->eflags); 4227 c->dst.val = test_cc(c->b, ctxt->eflags);
3604 break; 4228 break;
3605 case 0xa0: /* push fs */ 4229 case 0xa0: /* push fs */
3606 emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); 4230 rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
3607 break; 4231 break;
3608 case 0xa1: /* pop fs */ 4232 case 0xa1: /* pop fs */
3609 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); 4233 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
@@ -3620,7 +4244,7 @@ twobyte_insn:
3620 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); 4244 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3621 break; 4245 break;
3622 case 0xa8: /* push gs */ 4246 case 0xa8: /* push gs */
3623 emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); 4247 rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
3624 break; 4248 break;
3625 case 0xa9: /* pop gs */ 4249 case 0xa9: /* pop gs */
3626 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); 4250 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
@@ -3727,7 +4351,7 @@ twobyte_insn:
3727 (u64) c->src.val; 4351 (u64) c->src.val;
3728 break; 4352 break;
3729 case 0xc7: /* Grp9 (cmpxchg8b) */ 4353 case 0xc7: /* Grp9 (cmpxchg8b) */
3730 rc = emulate_grp9(ctxt, ops); 4354 rc = em_grp9(ctxt);
3731 break; 4355 break;
3732 default: 4356 default:
3733 goto cannot_emulate; 4357 goto cannot_emulate;
@@ -3739,5 +4363,5 @@ twobyte_insn:
3739 goto writeback; 4363 goto writeback;
3740 4364
3741cannot_emulate: 4365cannot_emulate:
3742 return -1; 4366 return EMULATION_FAILED;
3743} 4367}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 46d08ca0b48f..51a97426e791 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -33,7 +33,6 @@ struct kvm_kpit_state {
33}; 33};
34 34
35struct kvm_pit { 35struct kvm_pit {
36 unsigned long base_addresss;
37 struct kvm_io_device dev; 36 struct kvm_io_device dev;
38 struct kvm_io_device speaker_dev; 37 struct kvm_io_device speaker_dev;
39 struct kvm *kvm; 38 struct kvm *kvm;
@@ -51,7 +50,6 @@ struct kvm_pit {
51#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 50#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
52#define KVM_PIT_CHANNEL_MASK 0x3 51#define KVM_PIT_CHANNEL_MASK 0x3
53 52
54void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
55void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); 53void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
56struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); 54struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
57void kvm_free_pit(struct kvm *kvm); 55void kvm_free_pit(struct kvm *kvm);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ba910d149410..53e2d084bffb 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm);
75void kvm_destroy_pic(struct kvm *kvm); 75void kvm_destroy_pic(struct kvm *kvm);
76int kvm_pic_read_irq(struct kvm *kvm); 76int kvm_pic_read_irq(struct kvm *kvm);
77void kvm_pic_update_irq(struct kvm_pic *s); 77void kvm_pic_update_irq(struct kvm_pic *s);
78void kvm_pic_clear_isr_ack(struct kvm *kvm);
79 78
80static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) 79static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
81{ 80{
@@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
100void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); 99void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
101void __kvm_migrate_timers(struct kvm_vcpu *vcpu); 100void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
102 101
103int pit_has_pending_timer(struct kvm_vcpu *vcpu);
104int apic_has_pending_timer(struct kvm_vcpu *vcpu); 102int apic_has_pending_timer(struct kvm_vcpu *vcpu);
105 103
106#endif 104#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 22fae7593ee7..28418054b880 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
1206 1206
1207static void nonpaging_update_pte(struct kvm_vcpu *vcpu, 1207static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
1208 struct kvm_mmu_page *sp, u64 *spte, 1208 struct kvm_mmu_page *sp, u64 *spte,
1209 const void *pte, unsigned long mmu_seq) 1209 const void *pte)
1210{ 1210{
1211 WARN_ON(1); 1211 WARN_ON(1);
1212} 1212}
@@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
3163} 3163}
3164 3164
3165static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, 3165static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
3166 struct kvm_mmu_page *sp, 3166 struct kvm_mmu_page *sp, u64 *spte,
3167 u64 *spte, 3167 const void *new)
3168 const void *new, unsigned long mmu_seq)
3169{ 3168{
3170 if (sp->role.level != PT_PAGE_TABLE_LEVEL) { 3169 if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
3171 ++vcpu->kvm->stat.mmu_pde_zapped; 3170 ++vcpu->kvm->stat.mmu_pde_zapped;
@@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
3173 } 3172 }
3174 3173
3175 ++vcpu->kvm->stat.mmu_pte_updated; 3174 ++vcpu->kvm->stat.mmu_pte_updated;
3176 vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); 3175 vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
3177} 3176}
3178 3177
3179static bool need_remote_flush(u64 old, u64 new) 3178static bool need_remote_flush(u64 old, u64 new)
@@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3229 struct kvm_mmu_page *sp; 3228 struct kvm_mmu_page *sp;
3230 struct hlist_node *node; 3229 struct hlist_node *node;
3231 LIST_HEAD(invalid_list); 3230 LIST_HEAD(invalid_list);
3232 unsigned long mmu_seq;
3233 u64 entry, gentry, *spte; 3231 u64 entry, gentry, *spte;
3234 unsigned pte_size, page_offset, misaligned, quadrant, offset; 3232 unsigned pte_size, page_offset, misaligned, quadrant, offset;
3235 int level, npte, invlpg_counter, r, flooded = 0; 3233 int level, npte, invlpg_counter, r, flooded = 0;
@@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3271 break; 3269 break;
3272 } 3270 }
3273 3271
3274 mmu_seq = vcpu->kvm->mmu_notifier_seq;
3275 smp_rmb();
3276
3277 spin_lock(&vcpu->kvm->mmu_lock); 3272 spin_lock(&vcpu->kvm->mmu_lock);
3278 if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) 3273 if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
3279 gentry = 0; 3274 gentry = 0;
@@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3345 if (gentry && 3340 if (gentry &&
3346 !((sp->role.word ^ vcpu->arch.mmu.base_role.word) 3341 !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
3347 & mask.word)) 3342 & mask.word))
3348 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, 3343 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
3349 mmu_seq);
3350 if (!remote_flush && need_remote_flush(entry, *spte)) 3344 if (!remote_flush && need_remote_flush(entry, *spte))
3351 remote_flush = true; 3345 remote_flush = true;
3352 ++spte; 3346 ++spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index c6397795d865..6c4dc010c4cb 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
78 return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; 78 return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
79} 79}
80 80
81static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, 81static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
82 gfn_t table_gfn, unsigned index, 82 pt_element_t __user *ptep_user, unsigned index,
83 pt_element_t orig_pte, pt_element_t new_pte) 83 pt_element_t orig_pte, pt_element_t new_pte)
84{ 84{
85 int npages;
85 pt_element_t ret; 86 pt_element_t ret;
86 pt_element_t *table; 87 pt_element_t *table;
87 struct page *page; 88 struct page *page;
88 89
89 page = gfn_to_page(kvm, table_gfn); 90 npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
91 /* Check if the user is doing something meaningless. */
92 if (unlikely(npages != 1))
93 return -EFAULT;
90 94
91 table = kmap_atomic(page, KM_USER0); 95 table = kmap_atomic(page, KM_USER0);
92 ret = CMPXCHG(&table[index], orig_pte, new_pte); 96 ret = CMPXCHG(&table[index], orig_pte, new_pte);
@@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
117 gva_t addr, u32 access) 121 gva_t addr, u32 access)
118{ 122{
119 pt_element_t pte; 123 pt_element_t pte;
124 pt_element_t __user *ptep_user;
120 gfn_t table_gfn; 125 gfn_t table_gfn;
121 unsigned index, pt_access, uninitialized_var(pte_access); 126 unsigned index, pt_access, uninitialized_var(pte_access);
122 gpa_t pte_gpa; 127 gpa_t pte_gpa;
@@ -152,6 +157,9 @@ walk:
152 pt_access = ACC_ALL; 157 pt_access = ACC_ALL;
153 158
154 for (;;) { 159 for (;;) {
160 gfn_t real_gfn;
161 unsigned long host_addr;
162
155 index = PT_INDEX(addr, walker->level); 163 index = PT_INDEX(addr, walker->level);
156 164
157 table_gfn = gpte_to_gfn(pte); 165 table_gfn = gpte_to_gfn(pte);
@@ -160,43 +168,64 @@ walk:
160 walker->table_gfn[walker->level - 1] = table_gfn; 168 walker->table_gfn[walker->level - 1] = table_gfn;
161 walker->pte_gpa[walker->level - 1] = pte_gpa; 169 walker->pte_gpa[walker->level - 1] = pte_gpa;
162 170
163 if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, 171 real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
164 offset, sizeof(pte), 172 PFERR_USER_MASK|PFERR_WRITE_MASK);
165 PFERR_USER_MASK|PFERR_WRITE_MASK)) { 173 if (unlikely(real_gfn == UNMAPPED_GVA)) {
174 present = false;
175 break;
176 }
177 real_gfn = gpa_to_gfn(real_gfn);
178
179 host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
180 if (unlikely(kvm_is_error_hva(host_addr))) {
181 present = false;
182 break;
183 }
184
185 ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
186 if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) {
166 present = false; 187 present = false;
167 break; 188 break;
168 } 189 }
169 190
170 trace_kvm_mmu_paging_element(pte, walker->level); 191 trace_kvm_mmu_paging_element(pte, walker->level);
171 192
172 if (!is_present_gpte(pte)) { 193 if (unlikely(!is_present_gpte(pte))) {
173 present = false; 194 present = false;
174 break; 195 break;
175 } 196 }
176 197
177 if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { 198 if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte,
199 walker->level))) {
178 rsvd_fault = true; 200 rsvd_fault = true;
179 break; 201 break;
180 } 202 }
181 203
182 if (write_fault && !is_writable_pte(pte)) 204 if (unlikely(write_fault && !is_writable_pte(pte)
183 if (user_fault || is_write_protection(vcpu)) 205 && (user_fault || is_write_protection(vcpu))))
184 eperm = true; 206 eperm = true;
185 207
186 if (user_fault && !(pte & PT_USER_MASK)) 208 if (unlikely(user_fault && !(pte & PT_USER_MASK)))
187 eperm = true; 209 eperm = true;
188 210
189#if PTTYPE == 64 211#if PTTYPE == 64
190 if (fetch_fault && (pte & PT64_NX_MASK)) 212 if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
191 eperm = true; 213 eperm = true;
192#endif 214#endif
193 215
194 if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { 216 if (!eperm && !rsvd_fault
217 && unlikely(!(pte & PT_ACCESSED_MASK))) {
218 int ret;
195 trace_kvm_mmu_set_accessed_bit(table_gfn, index, 219 trace_kvm_mmu_set_accessed_bit(table_gfn, index,
196 sizeof(pte)); 220 sizeof(pte));
197 if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, 221 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
198 index, pte, pte|PT_ACCESSED_MASK)) 222 pte, pte|PT_ACCESSED_MASK);
223 if (unlikely(ret < 0)) {
224 present = false;
225 break;
226 } else if (ret)
199 goto walk; 227 goto walk;
228
200 mark_page_dirty(vcpu->kvm, table_gfn); 229 mark_page_dirty(vcpu->kvm, table_gfn);
201 pte |= PT_ACCESSED_MASK; 230 pte |= PT_ACCESSED_MASK;
202 } 231 }
@@ -241,17 +270,21 @@ walk:
241 --walker->level; 270 --walker->level;
242 } 271 }
243 272
244 if (!present || eperm || rsvd_fault) 273 if (unlikely(!present || eperm || rsvd_fault))
245 goto error; 274 goto error;
246 275
247 if (write_fault && !is_dirty_gpte(pte)) { 276 if (write_fault && unlikely(!is_dirty_gpte(pte))) {
248 bool ret; 277 int ret;
249 278
250 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); 279 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
251 ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, 280 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
252 pte|PT_DIRTY_MASK); 281 pte, pte|PT_DIRTY_MASK);
253 if (ret) 282 if (unlikely(ret < 0)) {
283 present = false;
284 goto error;
285 } else if (ret)
254 goto walk; 286 goto walk;
287
255 mark_page_dirty(vcpu->kvm, table_gfn); 288 mark_page_dirty(vcpu->kvm, table_gfn);
256 pte |= PT_DIRTY_MASK; 289 pte |= PT_DIRTY_MASK;
257 walker->ptes[walker->level - 1] = pte; 290 walker->ptes[walker->level - 1] = pte;
@@ -325,7 +358,7 @@ no_present:
325} 358}
326 359
327static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 360static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
328 u64 *spte, const void *pte, unsigned long mmu_seq) 361 u64 *spte, const void *pte)
329{ 362{
330 pt_element_t gpte; 363 pt_element_t gpte;
331 unsigned pte_access; 364 unsigned pte_access;
@@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
342 kvm_release_pfn_clean(pfn); 375 kvm_release_pfn_clean(pfn);
343 return; 376 return;
344 } 377 }
345 if (mmu_notifier_retry(vcpu, mmu_seq))
346 return;
347 378
348 /* 379 /*
349 * we call mmu_set_spte() with host_writable = true because that 380 * we call mmu_set_spte() with host_writable = true because that
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6bb15d583e47..506e4fe23adc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -63,6 +63,10 @@ MODULE_LICENSE("GPL");
63 63
64#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 64#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
65 65
66#define TSC_RATIO_RSVD 0xffffff0000000000ULL
67#define TSC_RATIO_MIN 0x0000000000000001ULL
68#define TSC_RATIO_MAX 0x000000ffffffffffULL
69
66static bool erratum_383_found __read_mostly; 70static bool erratum_383_found __read_mostly;
67 71
68static const u32 host_save_user_msrs[] = { 72static const u32 host_save_user_msrs[] = {
@@ -93,14 +97,6 @@ struct nested_state {
93 /* A VMEXIT is required but not yet emulated */ 97 /* A VMEXIT is required but not yet emulated */
94 bool exit_required; 98 bool exit_required;
95 99
96 /*
97 * If we vmexit during an instruction emulation we need this to restore
98 * the l1 guest rip after the emulation
99 */
100 unsigned long vmexit_rip;
101 unsigned long vmexit_rsp;
102 unsigned long vmexit_rax;
103
104 /* cache for intercepts of the guest */ 100 /* cache for intercepts of the guest */
105 u32 intercept_cr; 101 u32 intercept_cr;
106 u32 intercept_dr; 102 u32 intercept_dr;
@@ -144,8 +140,13 @@ struct vcpu_svm {
144 unsigned int3_injected; 140 unsigned int3_injected;
145 unsigned long int3_rip; 141 unsigned long int3_rip;
146 u32 apf_reason; 142 u32 apf_reason;
143
144 u64 tsc_ratio;
147}; 145};
148 146
147static DEFINE_PER_CPU(u64, current_tsc_ratio);
148#define TSC_RATIO_DEFAULT 0x0100000000ULL
149
149#define MSR_INVALID 0xffffffffU 150#define MSR_INVALID 0xffffffffU
150 151
151static struct svm_direct_access_msrs { 152static struct svm_direct_access_msrs {
@@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
190static int nested_svm_vmexit(struct vcpu_svm *svm); 191static int nested_svm_vmexit(struct vcpu_svm *svm);
191static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 192static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
192 bool has_error_code, u32 error_code); 193 bool has_error_code, u32 error_code);
194static u64 __scale_tsc(u64 ratio, u64 tsc);
193 195
194enum { 196enum {
195 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, 197 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -376,7 +378,6 @@ struct svm_cpu_data {
376}; 378};
377 379
378static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); 380static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
379static uint32_t svm_features;
380 381
381struct svm_init_data { 382struct svm_init_data {
382 int cpu; 383 int cpu;
@@ -569,6 +570,10 @@ static int has_svm(void)
569 570
570static void svm_hardware_disable(void *garbage) 571static void svm_hardware_disable(void *garbage)
571{ 572{
573 /* Make sure we clean up behind us */
574 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
575 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
576
572 cpu_svm_disable(); 577 cpu_svm_disable();
573} 578}
574 579
@@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage)
610 615
611 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); 616 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
612 617
618 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
619 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
620 __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
621 }
622
613 svm_init_erratum_383(); 623 svm_init_erratum_383();
614 624
615 return 0; 625 return 0;
@@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void)
791 if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) 801 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
792 kvm_enable_efer_bits(EFER_FFXSR); 802 kvm_enable_efer_bits(EFER_FFXSR);
793 803
804 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
805 u64 max;
806
807 kvm_has_tsc_control = true;
808
809 /*
810 * Make sure the user can only configure tsc_khz values that
811 * fit into a signed integer.
812 * A min value is not calculated needed because it will always
813 * be 1 on all machines and a value of 0 is used to disable
814 * tsc-scaling for the vcpu.
815 */
816 max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
817
818 kvm_max_guest_tsc_khz = max;
819 }
820
794 if (nested) { 821 if (nested) {
795 printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); 822 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
796 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); 823 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
@@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void)
802 goto err; 829 goto err;
803 } 830 }
804 831
805 svm_features = cpuid_edx(SVM_CPUID_FUNC);
806
807 if (!boot_cpu_has(X86_FEATURE_NPT)) 832 if (!boot_cpu_has(X86_FEATURE_NPT))
808 npt_enabled = false; 833 npt_enabled = false;
809 834
@@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
854 seg->base = 0; 879 seg->base = 0;
855} 880}
856 881
882static u64 __scale_tsc(u64 ratio, u64 tsc)
883{
884 u64 mult, frac, _tsc;
885
886 mult = ratio >> 32;
887 frac = ratio & ((1ULL << 32) - 1);
888
889 _tsc = tsc;
890 _tsc *= mult;
891 _tsc += (tsc >> 32) * frac;
892 _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
893
894 return _tsc;
895}
896
897static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
898{
899 struct vcpu_svm *svm = to_svm(vcpu);
900 u64 _tsc = tsc;
901
902 if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
903 _tsc = __scale_tsc(svm->tsc_ratio, tsc);
904
905 return _tsc;
906}
907
908static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
909{
910 struct vcpu_svm *svm = to_svm(vcpu);
911 u64 ratio;
912 u64 khz;
913
914 /* TSC scaling supported? */
915 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
916 return;
917
918 /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
919 if (user_tsc_khz == 0) {
920 vcpu->arch.virtual_tsc_khz = 0;
921 svm->tsc_ratio = TSC_RATIO_DEFAULT;
922 return;
923 }
924
925 khz = user_tsc_khz;
926
927 /* TSC scaling required - calculate ratio */
928 ratio = khz << 32;
929 do_div(ratio, tsc_khz);
930
931 if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
932 WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
933 user_tsc_khz);
934 return;
935 }
936 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
937 svm->tsc_ratio = ratio;
938}
939
857static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 940static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
858{ 941{
859 struct vcpu_svm *svm = to_svm(vcpu); 942 struct vcpu_svm *svm = to_svm(vcpu);
@@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
880 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 963 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
881} 964}
882 965
966static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
967{
968 u64 tsc;
969
970 tsc = svm_scale_tsc(vcpu, native_read_tsc());
971
972 return target_tsc - tsc;
973}
974
883static void init_vmcb(struct vcpu_svm *svm) 975static void init_vmcb(struct vcpu_svm *svm)
884{ 976{
885 struct vmcb_control_area *control = &svm->vmcb->control; 977 struct vmcb_control_area *control = &svm->vmcb->control;
@@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm)
975 svm_set_efer(&svm->vcpu, 0); 1067 svm_set_efer(&svm->vcpu, 0);
976 save->dr6 = 0xffff0ff0; 1068 save->dr6 = 0xffff0ff0;
977 save->dr7 = 0x400; 1069 save->dr7 = 0x400;
978 save->rflags = 2; 1070 kvm_set_rflags(&svm->vcpu, 2);
979 save->rip = 0x0000fff0; 1071 save->rip = 0x0000fff0;
980 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 1072 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
981 1073
@@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1048 goto out; 1140 goto out;
1049 } 1141 }
1050 1142
1143 svm->tsc_ratio = TSC_RATIO_DEFAULT;
1144
1051 err = kvm_vcpu_init(&svm->vcpu, kvm, id); 1145 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1052 if (err) 1146 if (err)
1053 goto free_svm; 1147 goto free_svm;
@@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1141 1235
1142 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 1236 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1143 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1237 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1238
1239 if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
1240 svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
1241 __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
1242 wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
1243 }
1144} 1244}
1145 1245
1146static void svm_vcpu_put(struct kvm_vcpu *vcpu) 1246static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1365{ 1465{
1366 struct vcpu_svm *svm = to_svm(vcpu); 1466 struct vcpu_svm *svm = to_svm(vcpu);
1367 1467
1368 if (is_guest_mode(vcpu)) {
1369 /*
1370 * We are here because we run in nested mode, the host kvm
1371 * intercepts cr0 writes but the l1 hypervisor does not.
1372 * But the L1 hypervisor may intercept selective cr0 writes.
1373 * This needs to be checked here.
1374 */
1375 unsigned long old, new;
1376
1377 /* Remove bits that would trigger a real cr0 write intercept */
1378 old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
1379 new = cr0 & SVM_CR0_SELECTIVE_MASK;
1380
1381 if (old == new) {
1382 /* cr0 write with ts and mp unchanged */
1383 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
1384 if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
1385 svm->nested.vmexit_rip = kvm_rip_read(vcpu);
1386 svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
1387 svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
1388 return;
1389 }
1390 }
1391 }
1392
1393#ifdef CONFIG_X86_64 1468#ifdef CONFIG_X86_64
1394 if (vcpu->arch.efer & EFER_LME) { 1469 if (vcpu->arch.efer & EFER_LME) {
1395 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1470 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
@@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
2127 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); 2202 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
2128 nested_vmcb->save.cr2 = vmcb->save.cr2; 2203 nested_vmcb->save.cr2 = vmcb->save.cr2;
2129 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; 2204 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
2130 nested_vmcb->save.rflags = vmcb->save.rflags; 2205 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
2131 nested_vmcb->save.rip = vmcb->save.rip; 2206 nested_vmcb->save.rip = vmcb->save.rip;
2132 nested_vmcb->save.rsp = vmcb->save.rsp; 2207 nested_vmcb->save.rsp = vmcb->save.rsp;
2133 nested_vmcb->save.rax = vmcb->save.rax; 2208 nested_vmcb->save.rax = vmcb->save.rax;
@@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
2184 svm->vmcb->save.ds = hsave->save.ds; 2259 svm->vmcb->save.ds = hsave->save.ds;
2185 svm->vmcb->save.gdtr = hsave->save.gdtr; 2260 svm->vmcb->save.gdtr = hsave->save.gdtr;
2186 svm->vmcb->save.idtr = hsave->save.idtr; 2261 svm->vmcb->save.idtr = hsave->save.idtr;
2187 svm->vmcb->save.rflags = hsave->save.rflags; 2262 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
2188 svm_set_efer(&svm->vcpu, hsave->save.efer); 2263 svm_set_efer(&svm->vcpu, hsave->save.efer);
2189 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); 2264 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2190 svm_set_cr4(&svm->vcpu, hsave->save.cr4); 2265 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
@@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
2312 hsave->save.efer = svm->vcpu.arch.efer; 2387 hsave->save.efer = svm->vcpu.arch.efer;
2313 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); 2388 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
2314 hsave->save.cr4 = svm->vcpu.arch.cr4; 2389 hsave->save.cr4 = svm->vcpu.arch.cr4;
2315 hsave->save.rflags = vmcb->save.rflags; 2390 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
2316 hsave->save.rip = kvm_rip_read(&svm->vcpu); 2391 hsave->save.rip = kvm_rip_read(&svm->vcpu);
2317 hsave->save.rsp = vmcb->save.rsp; 2392 hsave->save.rsp = vmcb->save.rsp;
2318 hsave->save.rax = vmcb->save.rax; 2393 hsave->save.rax = vmcb->save.rax;
@@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
2323 2398
2324 copy_vmcb_control_area(hsave, vmcb); 2399 copy_vmcb_control_area(hsave, vmcb);
2325 2400
2326 if (svm->vmcb->save.rflags & X86_EFLAGS_IF) 2401 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
2327 svm->vcpu.arch.hflags |= HF_HIF_MASK; 2402 svm->vcpu.arch.hflags |= HF_HIF_MASK;
2328 else 2403 else
2329 svm->vcpu.arch.hflags &= ~HF_HIF_MASK; 2404 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
@@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
2341 svm->vmcb->save.ds = nested_vmcb->save.ds; 2416 svm->vmcb->save.ds = nested_vmcb->save.ds;
2342 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; 2417 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2343 svm->vmcb->save.idtr = nested_vmcb->save.idtr; 2418 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
2344 svm->vmcb->save.rflags = nested_vmcb->save.rflags; 2419 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
2345 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); 2420 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2346 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); 2421 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2347 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); 2422 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
@@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm)
2443 if (nested_svm_check_permissions(svm)) 2518 if (nested_svm_check_permissions(svm))
2444 return 1; 2519 return 1;
2445 2520
2446 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2447 skip_emulated_instruction(&svm->vcpu);
2448
2449 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2521 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2450 if (!nested_vmcb) 2522 if (!nested_vmcb)
2451 return 1; 2523 return 1;
2452 2524
2525 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2526 skip_emulated_instruction(&svm->vcpu);
2527
2453 nested_svm_vmloadsave(nested_vmcb, svm->vmcb); 2528 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2454 nested_svm_unmap(page); 2529 nested_svm_unmap(page);
2455 2530
@@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm)
2464 if (nested_svm_check_permissions(svm)) 2539 if (nested_svm_check_permissions(svm))
2465 return 1; 2540 return 1;
2466 2541
2467 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2468 skip_emulated_instruction(&svm->vcpu);
2469
2470 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2542 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2471 if (!nested_vmcb) 2543 if (!nested_vmcb)
2472 return 1; 2544 return 1;
2473 2545
2546 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2547 skip_emulated_instruction(&svm->vcpu);
2548
2474 nested_svm_vmloadsave(svm->vmcb, nested_vmcb); 2549 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2475 nested_svm_unmap(page); 2550 nested_svm_unmap(page);
2476 2551
@@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm)
2676 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2751 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2677} 2752}
2678 2753
2754bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
2755{
2756 unsigned long cr0 = svm->vcpu.arch.cr0;
2757 bool ret = false;
2758 u64 intercept;
2759
2760 intercept = svm->nested.intercept;
2761
2762 if (!is_guest_mode(&svm->vcpu) ||
2763 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
2764 return false;
2765
2766 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2767 val &= ~SVM_CR0_SELECTIVE_MASK;
2768
2769 if (cr0 ^ val) {
2770 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2771 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2772 }
2773
2774 return ret;
2775}
2776
2679#define CR_VALID (1ULL << 63) 2777#define CR_VALID (1ULL << 63)
2680 2778
2681static int cr_interception(struct vcpu_svm *svm) 2779static int cr_interception(struct vcpu_svm *svm)
@@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm)
2699 val = kvm_register_read(&svm->vcpu, reg); 2797 val = kvm_register_read(&svm->vcpu, reg);
2700 switch (cr) { 2798 switch (cr) {
2701 case 0: 2799 case 0:
2702 err = kvm_set_cr0(&svm->vcpu, val); 2800 if (!check_selective_cr0_intercepted(svm, val))
2801 err = kvm_set_cr0(&svm->vcpu, val);
2802 else
2803 return 1;
2804
2703 break; 2805 break;
2704 case 3: 2806 case 3:
2705 err = kvm_set_cr3(&svm->vcpu, val); 2807 err = kvm_set_cr3(&svm->vcpu, val);
@@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm)
2744 return 1; 2846 return 1;
2745} 2847}
2746 2848
2747static int cr0_write_interception(struct vcpu_svm *svm)
2748{
2749 struct kvm_vcpu *vcpu = &svm->vcpu;
2750 int r;
2751
2752 r = cr_interception(svm);
2753
2754 if (svm->nested.vmexit_rip) {
2755 kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
2756 kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
2757 kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
2758 svm->nested.vmexit_rip = 0;
2759 }
2760
2761 return r;
2762}
2763
2764static int dr_interception(struct vcpu_svm *svm) 2849static int dr_interception(struct vcpu_svm *svm)
2765{ 2850{
2766 int reg, dr; 2851 int reg, dr;
@@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2813 case MSR_IA32_TSC: { 2898 case MSR_IA32_TSC: {
2814 struct vmcb *vmcb = get_host_vmcb(svm); 2899 struct vmcb *vmcb = get_host_vmcb(svm);
2815 2900
2816 *data = vmcb->control.tsc_offset + native_read_tsc(); 2901 *data = vmcb->control.tsc_offset +
2902 svm_scale_tsc(vcpu, native_read_tsc());
2903
2817 break; 2904 break;
2818 } 2905 }
2819 case MSR_STAR: 2906 case MSR_STAR:
@@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
3048 [SVM_EXIT_READ_CR4] = cr_interception, 3135 [SVM_EXIT_READ_CR4] = cr_interception,
3049 [SVM_EXIT_READ_CR8] = cr_interception, 3136 [SVM_EXIT_READ_CR8] = cr_interception,
3050 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, 3137 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
3051 [SVM_EXIT_WRITE_CR0] = cr0_write_interception, 3138 [SVM_EXIT_WRITE_CR0] = cr_interception,
3052 [SVM_EXIT_WRITE_CR3] = cr_interception, 3139 [SVM_EXIT_WRITE_CR3] = cr_interception,
3053 [SVM_EXIT_WRITE_CR4] = cr_interception, 3140 [SVM_EXIT_WRITE_CR4] = cr_interception,
3054 [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 3141 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
@@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
3104 [SVM_EXIT_NPF] = pf_interception, 3191 [SVM_EXIT_NPF] = pf_interception,
3105}; 3192};
3106 3193
3107void dump_vmcb(struct kvm_vcpu *vcpu) 3194static void dump_vmcb(struct kvm_vcpu *vcpu)
3108{ 3195{
3109 struct vcpu_svm *svm = to_svm(vcpu); 3196 struct vcpu_svm *svm = to_svm(vcpu);
3110 struct vmcb_control_area *control = &svm->vmcb->control; 3197 struct vmcb_control_area *control = &svm->vmcb->control;
3111 struct vmcb_save_area *save = &svm->vmcb->save; 3198 struct vmcb_save_area *save = &svm->vmcb->save;
3112 3199
3113 pr_err("VMCB Control Area:\n"); 3200 pr_err("VMCB Control Area:\n");
3114 pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); 3201 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
3115 pr_err("cr_write: %04x\n", control->intercept_cr >> 16); 3202 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
3116 pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); 3203 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
3117 pr_err("dr_write: %04x\n", control->intercept_dr >> 16); 3204 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
3118 pr_err("exceptions: %08x\n", control->intercept_exceptions); 3205 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
3119 pr_err("intercepts: %016llx\n", control->intercept); 3206 pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
3120 pr_err("pause filter count: %d\n", control->pause_filter_count); 3207 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3121 pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); 3208 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3122 pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); 3209 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3123 pr_err("tsc_offset: %016llx\n", control->tsc_offset); 3210 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3124 pr_err("asid: %d\n", control->asid); 3211 pr_err("%-20s%d\n", "asid:", control->asid);
3125 pr_err("tlb_ctl: %d\n", control->tlb_ctl); 3212 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3126 pr_err("int_ctl: %08x\n", control->int_ctl); 3213 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3127 pr_err("int_vector: %08x\n", control->int_vector); 3214 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3128 pr_err("int_state: %08x\n", control->int_state); 3215 pr_err("%-20s%08x\n", "int_state:", control->int_state);
3129 pr_err("exit_code: %08x\n", control->exit_code); 3216 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3130 pr_err("exit_info1: %016llx\n", control->exit_info_1); 3217 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3131 pr_err("exit_info2: %016llx\n", control->exit_info_2); 3218 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3132 pr_err("exit_int_info: %08x\n", control->exit_int_info); 3219 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3133 pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); 3220 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3134 pr_err("nested_ctl: %lld\n", control->nested_ctl); 3221 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3135 pr_err("nested_cr3: %016llx\n", control->nested_cr3); 3222 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3136 pr_err("event_inj: %08x\n", control->event_inj); 3223 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3137 pr_err("event_inj_err: %08x\n", control->event_inj_err); 3224 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3138 pr_err("lbr_ctl: %lld\n", control->lbr_ctl); 3225 pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
3139 pr_err("next_rip: %016llx\n", control->next_rip); 3226 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3140 pr_err("VMCB State Save Area:\n"); 3227 pr_err("VMCB State Save Area:\n");
3141 pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", 3228 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3142 save->es.selector, save->es.attrib, 3229 "es:",
3143 save->es.limit, save->es.base); 3230 save->es.selector, save->es.attrib,
3144 pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", 3231 save->es.limit, save->es.base);
3145 save->cs.selector, save->cs.attrib, 3232 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3146 save->cs.limit, save->cs.base); 3233 "cs:",
3147 pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", 3234 save->cs.selector, save->cs.attrib,
3148 save->ss.selector, save->ss.attrib, 3235 save->cs.limit, save->cs.base);
3149 save->ss.limit, save->ss.base); 3236 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3150 pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", 3237 "ss:",
3151 save->ds.selector, save->ds.attrib, 3238 save->ss.selector, save->ss.attrib,
3152 save->ds.limit, save->ds.base); 3239 save->ss.limit, save->ss.base);
3153 pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", 3240 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3154 save->fs.selector, save->fs.attrib, 3241 "ds:",
3155 save->fs.limit, save->fs.base); 3242 save->ds.selector, save->ds.attrib,
3156 pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", 3243 save->ds.limit, save->ds.base);
3157 save->gs.selector, save->gs.attrib, 3244 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3158 save->gs.limit, save->gs.base); 3245 "fs:",
3159 pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", 3246 save->fs.selector, save->fs.attrib,
3160 save->gdtr.selector, save->gdtr.attrib, 3247 save->fs.limit, save->fs.base);
3161 save->gdtr.limit, save->gdtr.base); 3248 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3162 pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", 3249 "gs:",
3163 save->ldtr.selector, save->ldtr.attrib, 3250 save->gs.selector, save->gs.attrib,
3164 save->ldtr.limit, save->ldtr.base); 3251 save->gs.limit, save->gs.base);
3165 pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", 3252 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3166 save->idtr.selector, save->idtr.attrib, 3253 "gdtr:",
3167 save->idtr.limit, save->idtr.base); 3254 save->gdtr.selector, save->gdtr.attrib,
3168 pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", 3255 save->gdtr.limit, save->gdtr.base);
3169 save->tr.selector, save->tr.attrib, 3256 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3170 save->tr.limit, save->tr.base); 3257 "ldtr:",
3258 save->ldtr.selector, save->ldtr.attrib,
3259 save->ldtr.limit, save->ldtr.base);
3260 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3261 "idtr:",
3262 save->idtr.selector, save->idtr.attrib,
3263 save->idtr.limit, save->idtr.base);
3264 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3265 "tr:",
3266 save->tr.selector, save->tr.attrib,
3267 save->tr.limit, save->tr.base);
3171 pr_err("cpl: %d efer: %016llx\n", 3268 pr_err("cpl: %d efer: %016llx\n",
3172 save->cpl, save->efer); 3269 save->cpl, save->efer);
3173 pr_err("cr0: %016llx cr2: %016llx\n", 3270 pr_err("%-15s %016llx %-13s %016llx\n",
3174 save->cr0, save->cr2); 3271 "cr0:", save->cr0, "cr2:", save->cr2);
3175 pr_err("cr3: %016llx cr4: %016llx\n", 3272 pr_err("%-15s %016llx %-13s %016llx\n",
3176 save->cr3, save->cr4); 3273 "cr3:", save->cr3, "cr4:", save->cr4);
3177 pr_err("dr6: %016llx dr7: %016llx\n", 3274 pr_err("%-15s %016llx %-13s %016llx\n",
3178 save->dr6, save->dr7); 3275 "dr6:", save->dr6, "dr7:", save->dr7);
3179 pr_err("rip: %016llx rflags: %016llx\n", 3276 pr_err("%-15s %016llx %-13s %016llx\n",
3180 save->rip, save->rflags); 3277 "rip:", save->rip, "rflags:", save->rflags);
3181 pr_err("rsp: %016llx rax: %016llx\n", 3278 pr_err("%-15s %016llx %-13s %016llx\n",
3182 save->rsp, save->rax); 3279 "rsp:", save->rsp, "rax:", save->rax);
3183 pr_err("star: %016llx lstar: %016llx\n", 3280 pr_err("%-15s %016llx %-13s %016llx\n",
3184 save->star, save->lstar); 3281 "star:", save->star, "lstar:", save->lstar);
3185 pr_err("cstar: %016llx sfmask: %016llx\n", 3282 pr_err("%-15s %016llx %-13s %016llx\n",
3186 save->cstar, save->sfmask); 3283 "cstar:", save->cstar, "sfmask:", save->sfmask);
3187 pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", 3284 pr_err("%-15s %016llx %-13s %016llx\n",
3188 save->kernel_gs_base, save->sysenter_cs); 3285 "kernel_gs_base:", save->kernel_gs_base,
3189 pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", 3286 "sysenter_cs:", save->sysenter_cs);
3190 save->sysenter_esp, save->sysenter_eip); 3287 pr_err("%-15s %016llx %-13s %016llx\n",
3191 pr_err("gpat: %016llx dbgctl: %016llx\n", 3288 "sysenter_esp:", save->sysenter_esp,
3192 save->g_pat, save->dbgctl); 3289 "sysenter_eip:", save->sysenter_eip);
3193 pr_err("br_from: %016llx br_to: %016llx\n", 3290 pr_err("%-15s %016llx %-13s %016llx\n",
3194 save->br_from, save->br_to); 3291 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3195 pr_err("excp_from: %016llx excp_to: %016llx\n", 3292 pr_err("%-15s %016llx %-13s %016llx\n",
3196 save->last_excp_from, save->last_excp_to); 3293 "br_from:", save->br_from, "br_to:", save->br_to);
3197 3294 pr_err("%-15s %016llx %-13s %016llx\n",
3295 "excp_from:", save->last_excp_from,
3296 "excp_to:", save->last_excp_to);
3198} 3297}
3199 3298
3200static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) 3299static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
@@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3384 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) 3483 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
3385 return 0; 3484 return 0;
3386 3485
3387 ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); 3486 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
3388 3487
3389 if (is_guest_mode(vcpu)) 3488 if (is_guest_mode(vcpu))
3390 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); 3489 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
@@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
3871 update_cr0_intercept(svm); 3970 update_cr0_intercept(svm);
3872} 3971}
3873 3972
3973#define PRE_EX(exit) { .exit_code = (exit), \
3974 .stage = X86_ICPT_PRE_EXCEPT, }
3975#define POST_EX(exit) { .exit_code = (exit), \
3976 .stage = X86_ICPT_POST_EXCEPT, }
3977#define POST_MEM(exit) { .exit_code = (exit), \
3978 .stage = X86_ICPT_POST_MEMACCESS, }
3979
3980static struct __x86_intercept {
3981 u32 exit_code;
3982 enum x86_intercept_stage stage;
3983} x86_intercept_map[] = {
3984 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
3985 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
3986 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
3987 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
3988 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
3989 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
3990 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
3991 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
3992 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
3993 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
3994 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
3995 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
3996 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
3997 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
3998 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
3999 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
4000 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
4001 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
4002 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
4003 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
4004 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
4005 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
4006 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
4007 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
4008 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
4009 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
4010 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
4011 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
4012 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
4013 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
4014 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
4015 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
4016 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
4017 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
4018 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
4019 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
4020 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
4021 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
4022 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
4023 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
4024 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
4025 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
4026 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
4027 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
4028 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
4029 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
4030};
4031
4032#undef PRE_EX
4033#undef POST_EX
4034#undef POST_MEM
4035
4036static int svm_check_intercept(struct kvm_vcpu *vcpu,
4037 struct x86_instruction_info *info,
4038 enum x86_intercept_stage stage)
4039{
4040 struct vcpu_svm *svm = to_svm(vcpu);
4041 int vmexit, ret = X86EMUL_CONTINUE;
4042 struct __x86_intercept icpt_info;
4043 struct vmcb *vmcb = svm->vmcb;
4044
4045 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4046 goto out;
4047
4048 icpt_info = x86_intercept_map[info->intercept];
4049
4050 if (stage != icpt_info.stage)
4051 goto out;
4052
4053 switch (icpt_info.exit_code) {
4054 case SVM_EXIT_READ_CR0:
4055 if (info->intercept == x86_intercept_cr_read)
4056 icpt_info.exit_code += info->modrm_reg;
4057 break;
4058 case SVM_EXIT_WRITE_CR0: {
4059 unsigned long cr0, val;
4060 u64 intercept;
4061
4062 if (info->intercept == x86_intercept_cr_write)
4063 icpt_info.exit_code += info->modrm_reg;
4064
4065 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
4066 break;
4067
4068 intercept = svm->nested.intercept;
4069
4070 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
4071 break;
4072
4073 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4074 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
4075
4076 if (info->intercept == x86_intercept_lmsw) {
4077 cr0 &= 0xfUL;
4078 val &= 0xfUL;
4079 /* lmsw can't clear PE - catch this here */
4080 if (cr0 & X86_CR0_PE)
4081 val |= X86_CR0_PE;
4082 }
4083
4084 if (cr0 ^ val)
4085 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4086
4087 break;
4088 }
4089 case SVM_EXIT_READ_DR0:
4090 case SVM_EXIT_WRITE_DR0:
4091 icpt_info.exit_code += info->modrm_reg;
4092 break;
4093 case SVM_EXIT_MSR:
4094 if (info->intercept == x86_intercept_wrmsr)
4095 vmcb->control.exit_info_1 = 1;
4096 else
4097 vmcb->control.exit_info_1 = 0;
4098 break;
4099 case SVM_EXIT_PAUSE:
4100 /*
4101 * We get this for NOP only, but pause
4102 * is rep not, check this here
4103 */
4104 if (info->rep_prefix != REPE_PREFIX)
4105 goto out;
4106 case SVM_EXIT_IOIO: {
4107 u64 exit_info;
4108 u32 bytes;
4109
4110 exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
4111
4112 if (info->intercept == x86_intercept_in ||
4113 info->intercept == x86_intercept_ins) {
4114 exit_info |= SVM_IOIO_TYPE_MASK;
4115 bytes = info->src_bytes;
4116 } else {
4117 bytes = info->dst_bytes;
4118 }
4119
4120 if (info->intercept == x86_intercept_outs ||
4121 info->intercept == x86_intercept_ins)
4122 exit_info |= SVM_IOIO_STR_MASK;
4123
4124 if (info->rep_prefix)
4125 exit_info |= SVM_IOIO_REP_MASK;
4126
4127 bytes = min(bytes, 4u);
4128
4129 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
4130
4131 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
4132
4133 vmcb->control.exit_info_1 = exit_info;
4134 vmcb->control.exit_info_2 = info->next_rip;
4135
4136 break;
4137 }
4138 default:
4139 break;
4140 }
4141
4142 vmcb->control.next_rip = info->next_rip;
4143 vmcb->control.exit_code = icpt_info.exit_code;
4144 vmexit = nested_svm_exit_handled(svm);
4145
4146 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
4147 : X86EMUL_CONTINUE;
4148
4149out:
4150 return ret;
4151}
4152
3874static struct kvm_x86_ops svm_x86_ops = { 4153static struct kvm_x86_ops svm_x86_ops = {
3875 .cpu_has_kvm_support = has_svm, 4154 .cpu_has_kvm_support = has_svm,
3876 .disabled_by_bios = is_disabled, 4155 .disabled_by_bios = is_disabled,
@@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = {
3952 4231
3953 .has_wbinvd_exit = svm_has_wbinvd_exit, 4232 .has_wbinvd_exit = svm_has_wbinvd_exit,
3954 4233
4234 .set_tsc_khz = svm_set_tsc_khz,
3955 .write_tsc_offset = svm_write_tsc_offset, 4235 .write_tsc_offset = svm_write_tsc_offset,
3956 .adjust_tsc_offset = svm_adjust_tsc_offset, 4236 .adjust_tsc_offset = svm_adjust_tsc_offset,
4237 .compute_tsc_offset = svm_compute_tsc_offset,
3957 4238
3958 .set_tdp_cr3 = set_tdp_cr3, 4239 .set_tdp_cr3 = set_tdp_cr3,
4240
4241 .check_intercept = svm_check_intercept,
3959}; 4242};
3960 4243
3961static int __init svm_init(void) 4244static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5b4cdcbd154c..4c3fa0f67469 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -128,8 +128,11 @@ struct vcpu_vmx {
128 unsigned long host_rsp; 128 unsigned long host_rsp;
129 int launched; 129 int launched;
130 u8 fail; 130 u8 fail;
131 u8 cpl;
132 bool nmi_known_unmasked;
131 u32 exit_intr_info; 133 u32 exit_intr_info;
132 u32 idt_vectoring_info; 134 u32 idt_vectoring_info;
135 ulong rflags;
133 struct shared_msr_entry *guest_msrs; 136 struct shared_msr_entry *guest_msrs;
134 int nmsrs; 137 int nmsrs;
135 int save_nmsrs; 138 int save_nmsrs;
@@ -159,6 +162,10 @@ struct vcpu_vmx {
159 u32 ar; 162 u32 ar;
160 } tr, es, ds, fs, gs; 163 } tr, es, ds, fs, gs;
161 } rmode; 164 } rmode;
165 struct {
166 u32 bitmask; /* 4 bits per segment (1 bit per field) */
167 struct kvm_save_segment seg[8];
168 } segment_cache;
162 int vpid; 169 int vpid;
163 bool emulation_required; 170 bool emulation_required;
164 171
@@ -171,6 +178,15 @@ struct vcpu_vmx {
171 bool rdtscp_enabled; 178 bool rdtscp_enabled;
172}; 179};
173 180
181enum segment_cache_field {
182 SEG_FIELD_SEL = 0,
183 SEG_FIELD_BASE = 1,
184 SEG_FIELD_LIMIT = 2,
185 SEG_FIELD_AR = 3,
186
187 SEG_FIELD_NR = 4
188};
189
174static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 190static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
175{ 191{
176 return container_of(vcpu, struct vcpu_vmx, vcpu); 192 return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
643 vmcs_writel(field, vmcs_readl(field) | mask); 659 vmcs_writel(field, vmcs_readl(field) | mask);
644} 660}
645 661
662static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
663{
664 vmx->segment_cache.bitmask = 0;
665}
666
667static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
668 unsigned field)
669{
670 bool ret;
671 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
672
673 if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
674 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
675 vmx->segment_cache.bitmask = 0;
676 }
677 ret = vmx->segment_cache.bitmask & mask;
678 vmx->segment_cache.bitmask |= mask;
679 return ret;
680}
681
682static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
683{
684 u16 *p = &vmx->segment_cache.seg[seg].selector;
685
686 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
687 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
688 return *p;
689}
690
691static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
692{
693 ulong *p = &vmx->segment_cache.seg[seg].base;
694
695 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
696 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
697 return *p;
698}
699
700static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
701{
702 u32 *p = &vmx->segment_cache.seg[seg].limit;
703
704 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
705 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
706 return *p;
707}
708
709static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
710{
711 u32 *p = &vmx->segment_cache.seg[seg].ar;
712
713 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
714 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
715 return *p;
716}
717
646static void update_exception_bitmap(struct kvm_vcpu *vcpu) 718static void update_exception_bitmap(struct kvm_vcpu *vcpu)
647{ 719{
648 u32 eb; 720 u32 eb;
@@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
970{ 1042{
971 unsigned long rflags, save_rflags; 1043 unsigned long rflags, save_rflags;
972 1044
973 rflags = vmcs_readl(GUEST_RFLAGS); 1045 if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
974 if (to_vmx(vcpu)->rmode.vm86_active) { 1046 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
975 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 1047 rflags = vmcs_readl(GUEST_RFLAGS);
976 save_rflags = to_vmx(vcpu)->rmode.save_rflags; 1048 if (to_vmx(vcpu)->rmode.vm86_active) {
977 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 1049 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1050 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
1051 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1052 }
1053 to_vmx(vcpu)->rflags = rflags;
978 } 1054 }
979 return rflags; 1055 return to_vmx(vcpu)->rflags;
980} 1056}
981 1057
982static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1058static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
983{ 1059{
1060 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1061 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
1062 to_vmx(vcpu)->rflags = rflags;
984 if (to_vmx(vcpu)->rmode.vm86_active) { 1063 if (to_vmx(vcpu)->rmode.vm86_active) {
985 to_vmx(vcpu)->rmode.save_rflags = rflags; 1064 to_vmx(vcpu)->rmode.save_rflags = rflags;
986 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1065 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1053 } 1132 }
1054 1133
1055 if (vmx->rmode.vm86_active) { 1134 if (vmx->rmode.vm86_active) {
1056 if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) 1135 int inc_eip = 0;
1136 if (kvm_exception_is_soft(nr))
1137 inc_eip = vcpu->arch.event_exit_inst_len;
1138 if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
1057 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 1139 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
1058 return; 1140 return;
1059 } 1141 }
@@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void)
1151} 1233}
1152 1234
1153/* 1235/*
1236 * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
1237 * ioctl. In this case the call-back should update internal vmx state to make
1238 * the changes effective.
1239 */
1240static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1241{
1242 /* Nothing to do here */
1243}
1244
1245/*
1154 * writes 'offset' into guest's timestamp counter offset register 1246 * writes 'offset' into guest's timestamp counter offset register
1155 */ 1247 */
1156static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 1248static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
1164 vmcs_write64(TSC_OFFSET, offset + adjustment); 1256 vmcs_write64(TSC_OFFSET, offset + adjustment);
1165} 1257}
1166 1258
1259static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1260{
1261 return target_tsc - native_read_tsc();
1262}
1263
1167/* 1264/*
1168 * Reads an msr value (of 'msr_index') into 'pdata'. 1265 * Reads an msr value (of 'msr_index') into 'pdata'.
1169 * Returns 0 on success, non-0 otherwise. 1266 * Returns 0 on success, non-0 otherwise.
@@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1243 break; 1340 break;
1244#ifdef CONFIG_X86_64 1341#ifdef CONFIG_X86_64
1245 case MSR_FS_BASE: 1342 case MSR_FS_BASE:
1343 vmx_segment_cache_clear(vmx);
1246 vmcs_writel(GUEST_FS_BASE, data); 1344 vmcs_writel(GUEST_FS_BASE, data);
1247 break; 1345 break;
1248 case MSR_GS_BASE: 1346 case MSR_GS_BASE:
1347 vmx_segment_cache_clear(vmx);
1249 vmcs_writel(GUEST_GS_BASE, data); 1348 vmcs_writel(GUEST_GS_BASE, data);
1250 break; 1349 break;
1251 case MSR_KERNEL_GS_BASE: 1350 case MSR_KERNEL_GS_BASE:
@@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1689 vmx->emulation_required = 1; 1788 vmx->emulation_required = 1;
1690 vmx->rmode.vm86_active = 0; 1789 vmx->rmode.vm86_active = 0;
1691 1790
1791 vmx_segment_cache_clear(vmx);
1792
1692 vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); 1793 vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
1693 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); 1794 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
1694 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); 1795 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
@@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1712 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); 1813 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
1713 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); 1814 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
1714 1815
1816 vmx_segment_cache_clear(vmx);
1817
1715 vmcs_write16(GUEST_SS_SELECTOR, 0); 1818 vmcs_write16(GUEST_SS_SELECTOR, 0);
1716 vmcs_write32(GUEST_SS_AR_BYTES, 0x93); 1819 vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
1717 1820
@@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1775 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1878 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1776 } 1879 }
1777 1880
1881 vmx_segment_cache_clear(vmx);
1882
1778 vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); 1883 vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
1779 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); 1884 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
1780 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); 1885 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1851{ 1956{
1852 u32 guest_tr_ar; 1957 u32 guest_tr_ar;
1853 1958
1959 vmx_segment_cache_clear(to_vmx(vcpu));
1960
1854 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 1961 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
1855 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { 1962 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
1856 printk(KERN_DEBUG "%s: tss fixup for long mode. \n", 1963 printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
@@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1998 vmcs_writel(CR0_READ_SHADOW, cr0); 2105 vmcs_writel(CR0_READ_SHADOW, cr0);
1999 vmcs_writel(GUEST_CR0, hw_cr0); 2106 vmcs_writel(GUEST_CR0, hw_cr0);
2000 vcpu->arch.cr0 = cr0; 2107 vcpu->arch.cr0 = cr0;
2108 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2001} 2109}
2002 2110
2003static u64 construct_eptp(unsigned long root_hpa) 2111static u64 construct_eptp(unsigned long root_hpa)
@@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
2053 struct kvm_segment *var, int seg) 2161 struct kvm_segment *var, int seg)
2054{ 2162{
2055 struct vcpu_vmx *vmx = to_vmx(vcpu); 2163 struct vcpu_vmx *vmx = to_vmx(vcpu);
2056 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2057 struct kvm_save_segment *save; 2164 struct kvm_save_segment *save;
2058 u32 ar; 2165 u32 ar;
2059 2166
@@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
2075 var->limit = save->limit; 2182 var->limit = save->limit;
2076 ar = save->ar; 2183 ar = save->ar;
2077 if (seg == VCPU_SREG_TR 2184 if (seg == VCPU_SREG_TR
2078 || var->selector == vmcs_read16(sf->selector)) 2185 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
2079 goto use_saved_rmode_seg; 2186 goto use_saved_rmode_seg;
2080 } 2187 }
2081 var->base = vmcs_readl(sf->base); 2188 var->base = vmx_read_guest_seg_base(vmx, seg);
2082 var->limit = vmcs_read32(sf->limit); 2189 var->limit = vmx_read_guest_seg_limit(vmx, seg);
2083 var->selector = vmcs_read16(sf->selector); 2190 var->selector = vmx_read_guest_seg_selector(vmx, seg);
2084 ar = vmcs_read32(sf->ar_bytes); 2191 ar = vmx_read_guest_seg_ar(vmx, seg);
2085use_saved_rmode_seg: 2192use_saved_rmode_seg:
2086 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) 2193 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
2087 ar = 0; 2194 ar = 0;
@@ -2098,27 +2205,37 @@ use_saved_rmode_seg:
2098 2205
2099static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 2206static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2100{ 2207{
2101 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2102 struct kvm_segment s; 2208 struct kvm_segment s;
2103 2209
2104 if (to_vmx(vcpu)->rmode.vm86_active) { 2210 if (to_vmx(vcpu)->rmode.vm86_active) {
2105 vmx_get_segment(vcpu, &s, seg); 2211 vmx_get_segment(vcpu, &s, seg);
2106 return s.base; 2212 return s.base;
2107 } 2213 }
2108 return vmcs_readl(sf->base); 2214 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
2109} 2215}
2110 2216
2111static int vmx_get_cpl(struct kvm_vcpu *vcpu) 2217static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
2112{ 2218{
2113 if (!is_protmode(vcpu)) 2219 if (!is_protmode(vcpu))
2114 return 0; 2220 return 0;
2115 2221
2116 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 2222 if (!is_long_mode(vcpu)
2223 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
2117 return 3; 2224 return 3;
2118 2225
2119 return vmcs_read16(GUEST_CS_SELECTOR) & 3; 2226 return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
2120} 2227}
2121 2228
2229static int vmx_get_cpl(struct kvm_vcpu *vcpu)
2230{
2231 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
2232 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2233 to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu);
2234 }
2235 return to_vmx(vcpu)->cpl;
2236}
2237
2238
2122static u32 vmx_segment_access_rights(struct kvm_segment *var) 2239static u32 vmx_segment_access_rights(struct kvm_segment *var)
2123{ 2240{
2124 u32 ar; 2241 u32 ar;
@@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
2148 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2265 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2149 u32 ar; 2266 u32 ar;
2150 2267
2268 vmx_segment_cache_clear(vmx);
2269
2151 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { 2270 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
2152 vmcs_write16(sf->selector, var->selector); 2271 vmcs_write16(sf->selector, var->selector);
2153 vmx->rmode.tr.selector = var->selector; 2272 vmx->rmode.tr.selector = var->selector;
@@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
2184 ar |= 0x1; /* Accessed */ 2303 ar |= 0x1; /* Accessed */
2185 2304
2186 vmcs_write32(sf->ar_bytes, ar); 2305 vmcs_write32(sf->ar_bytes, ar);
2306 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2187} 2307}
2188 2308
2189static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 2309static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
2190{ 2310{
2191 u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); 2311 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
2192 2312
2193 *db = (ar >> 14) & 1; 2313 *db = (ar >> 14) & 1;
2194 *l = (ar >> 13) & 1; 2314 *l = (ar >> 13) & 1;
@@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2775 if (ret != 0) 2895 if (ret != 0)
2776 goto out; 2896 goto out;
2777 2897
2898 vmx_segment_cache_clear(vmx);
2899
2778 seg_setup(VCPU_SREG_CS); 2900 seg_setup(VCPU_SREG_CS);
2779 /* 2901 /*
2780 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode 2902 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
@@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
2904 3026
2905 ++vcpu->stat.irq_injections; 3027 ++vcpu->stat.irq_injections;
2906 if (vmx->rmode.vm86_active) { 3028 if (vmx->rmode.vm86_active) {
2907 if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) 3029 int inc_eip = 0;
3030 if (vcpu->arch.interrupt.soft)
3031 inc_eip = vcpu->arch.event_exit_inst_len;
3032 if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
2908 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 3033 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2909 return; 3034 return;
2910 } 3035 }
@@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2937 } 3062 }
2938 3063
2939 ++vcpu->stat.nmi_injections; 3064 ++vcpu->stat.nmi_injections;
3065 vmx->nmi_known_unmasked = false;
2940 if (vmx->rmode.vm86_active) { 3066 if (vmx->rmode.vm86_active) {
2941 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) 3067 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
2942 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 3068 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2943 return; 3069 return;
2944 } 3070 }
@@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
2961{ 3087{
2962 if (!cpu_has_virtual_nmis()) 3088 if (!cpu_has_virtual_nmis())
2963 return to_vmx(vcpu)->soft_vnmi_blocked; 3089 return to_vmx(vcpu)->soft_vnmi_blocked;
3090 if (to_vmx(vcpu)->nmi_known_unmasked)
3091 return false;
2964 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; 3092 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
2965} 3093}
2966 3094
@@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
2974 vmx->vnmi_blocked_time = 0; 3102 vmx->vnmi_blocked_time = 0;
2975 } 3103 }
2976 } else { 3104 } else {
3105 vmx->nmi_known_unmasked = !masked;
2977 if (masked) 3106 if (masked)
2978 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3107 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
2979 GUEST_INTR_STATE_NMI); 3108 GUEST_INTR_STATE_NMI);
@@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3091 enum emulation_result er; 3220 enum emulation_result er;
3092 3221
3093 vect_info = vmx->idt_vectoring_info; 3222 vect_info = vmx->idt_vectoring_info;
3094 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3223 intr_info = vmx->exit_intr_info;
3095 3224
3096 if (is_machine_check(intr_info)) 3225 if (is_machine_check(intr_info))
3097 return handle_machine_check(vcpu); 3226 return handle_machine_check(vcpu);
@@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3122 } 3251 }
3123 3252
3124 error_code = 0; 3253 error_code = 0;
3125 rip = kvm_rip_read(vcpu);
3126 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 3254 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
3127 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 3255 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
3128 if (is_page_fault(intr_info)) { 3256 if (is_page_fault(intr_info)) {
@@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3169 vmx->vcpu.arch.event_exit_inst_len = 3297 vmx->vcpu.arch.event_exit_inst_len =
3170 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 3298 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
3171 kvm_run->exit_reason = KVM_EXIT_DEBUG; 3299 kvm_run->exit_reason = KVM_EXIT_DEBUG;
3300 rip = kvm_rip_read(vcpu);
3172 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 3301 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
3173 kvm_run->debug.arch.exception = ex_no; 3302 kvm_run->debug.arch.exception = ex_no;
3174 break; 3303 break;
@@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
3505 switch (type) { 3634 switch (type) {
3506 case INTR_TYPE_NMI_INTR: 3635 case INTR_TYPE_NMI_INTR:
3507 vcpu->arch.nmi_injected = false; 3636 vcpu->arch.nmi_injected = false;
3508 if (cpu_has_virtual_nmis()) 3637 vmx_set_nmi_mask(vcpu, true);
3509 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3510 GUEST_INTR_STATE_NMI);
3511 break; 3638 break;
3512 case INTR_TYPE_EXT_INTR: 3639 case INTR_TYPE_EXT_INTR:
3513 case INTR_TYPE_SOFT_INTR: 3640 case INTR_TYPE_SOFT_INTR:
@@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3867 3994
3868static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) 3995static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
3869{ 3996{
3870 u32 exit_intr_info = vmx->exit_intr_info; 3997 u32 exit_intr_info;
3998
3999 if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
4000 || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
4001 return;
4002
4003 vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
4004 exit_intr_info = vmx->exit_intr_info;
3871 4005
3872 /* Handle machine checks before interrupts are enabled */ 4006 /* Handle machine checks before interrupts are enabled */
3873 if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) 4007 if (is_machine_check(exit_intr_info))
3874 || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
3875 && is_machine_check(exit_intr_info)))
3876 kvm_machine_check(); 4008 kvm_machine_check();
3877 4009
3878 /* We need to handle NMIs before interrupts are enabled */ 4010 /* We need to handle NMIs before interrupts are enabled */
@@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
3886 4018
3887static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 4019static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
3888{ 4020{
3889 u32 exit_intr_info = vmx->exit_intr_info; 4021 u32 exit_intr_info;
3890 bool unblock_nmi; 4022 bool unblock_nmi;
3891 u8 vector; 4023 u8 vector;
3892 bool idtv_info_valid; 4024 bool idtv_info_valid;
@@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
3894 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; 4026 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3895 4027
3896 if (cpu_has_virtual_nmis()) { 4028 if (cpu_has_virtual_nmis()) {
4029 if (vmx->nmi_known_unmasked)
4030 return;
4031 /*
4032 * Can't use vmx->exit_intr_info since we're not sure what
4033 * the exit reason is.
4034 */
4035 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3897 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 4036 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
3898 vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 4037 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
3899 /* 4038 /*
@@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
3910 vector != DF_VECTOR && !idtv_info_valid) 4049 vector != DF_VECTOR && !idtv_info_valid)
3911 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 4050 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3912 GUEST_INTR_STATE_NMI); 4051 GUEST_INTR_STATE_NMI);
4052 else
4053 vmx->nmi_known_unmasked =
4054 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
4055 & GUEST_INTR_STATE_NMI);
3913 } else if (unlikely(vmx->soft_vnmi_blocked)) 4056 } else if (unlikely(vmx->soft_vnmi_blocked))
3914 vmx->vnmi_blocked_time += 4057 vmx->vnmi_blocked_time +=
3915 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); 4058 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
@@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
3946 * Clear bit "block by NMI" before VM entry if a NMI 4089 * Clear bit "block by NMI" before VM entry if a NMI
3947 * delivery faulted. 4090 * delivery faulted.
3948 */ 4091 */
3949 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 4092 vmx_set_nmi_mask(&vmx->vcpu, false);
3950 GUEST_INTR_STATE_NMI);
3951 break; 4093 break;
3952 case INTR_TYPE_SOFT_EXCEPTION: 4094 case INTR_TYPE_SOFT_EXCEPTION:
3953 vmx->vcpu.arch.event_exit_inst_len = 4095 vmx->vcpu.arch.event_exit_inst_len =
@@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
4124 ); 4266 );
4125 4267
4126 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) 4268 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
4269 | (1 << VCPU_EXREG_RFLAGS)
4270 | (1 << VCPU_EXREG_CPL)
4127 | (1 << VCPU_EXREG_PDPTR) 4271 | (1 << VCPU_EXREG_PDPTR)
4272 | (1 << VCPU_EXREG_SEGMENTS)
4128 | (1 << VCPU_EXREG_CR3)); 4273 | (1 << VCPU_EXREG_CR3));
4129 vcpu->arch.regs_dirty = 0; 4274 vcpu->arch.regs_dirty = 0;
4130 4275
@@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
4134 vmx->launched = 1; 4279 vmx->launched = 1;
4135 4280
4136 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 4281 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
4137 vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
4138 4282
4139 vmx_complete_atomic_exit(vmx); 4283 vmx_complete_atomic_exit(vmx);
4140 vmx_recover_nmi_blocking(vmx); 4284 vmx_recover_nmi_blocking(vmx);
@@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4195 goto free_vcpu; 4339 goto free_vcpu;
4196 4340
4197 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); 4341 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
4342 err = -ENOMEM;
4198 if (!vmx->guest_msrs) { 4343 if (!vmx->guest_msrs) {
4199 err = -ENOMEM;
4200 goto uninit_vcpu; 4344 goto uninit_vcpu;
4201 } 4345 }
4202 4346
@@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4215 if (err) 4359 if (err)
4216 goto free_vmcs; 4360 goto free_vmcs;
4217 if (vm_need_virtualize_apic_accesses(kvm)) 4361 if (vm_need_virtualize_apic_accesses(kvm))
4218 if (alloc_apic_access_page(kvm) != 0) 4362 err = alloc_apic_access_page(kvm);
4363 if (err)
4219 goto free_vmcs; 4364 goto free_vmcs;
4220 4365
4221 if (enable_ept) { 4366 if (enable_ept) {
@@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
4368{ 4513{
4369} 4514}
4370 4515
4516static int vmx_check_intercept(struct kvm_vcpu *vcpu,
4517 struct x86_instruction_info *info,
4518 enum x86_intercept_stage stage)
4519{
4520 return X86EMUL_CONTINUE;
4521}
4522
4371static struct kvm_x86_ops vmx_x86_ops = { 4523static struct kvm_x86_ops vmx_x86_ops = {
4372 .cpu_has_kvm_support = cpu_has_kvm_support, 4524 .cpu_has_kvm_support = cpu_has_kvm_support,
4373 .disabled_by_bios = vmx_disabled_by_bios, 4525 .disabled_by_bios = vmx_disabled_by_bios,
@@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = {
4449 4601
4450 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 4602 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
4451 4603
4604 .set_tsc_khz = vmx_set_tsc_khz,
4452 .write_tsc_offset = vmx_write_tsc_offset, 4605 .write_tsc_offset = vmx_write_tsc_offset,
4453 .adjust_tsc_offset = vmx_adjust_tsc_offset, 4606 .adjust_tsc_offset = vmx_adjust_tsc_offset,
4607 .compute_tsc_offset = vmx_compute_tsc_offset,
4454 4608
4455 .set_tdp_cr3 = vmx_set_cr3, 4609 .set_tdp_cr3 = vmx_set_cr3,
4610
4611 .check_intercept = vmx_check_intercept,
4456}; 4612};
4457 4613
4458static int __init vmx_init(void) 4614static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 934b4c6b0bf9..77c9d8673dc4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -60,22 +60,12 @@
60#include <asm/div64.h> 60#include <asm/div64.h>
61 61
62#define MAX_IO_MSRS 256 62#define MAX_IO_MSRS 256
63#define CR0_RESERVED_BITS \
64 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
65 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
66 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
67#define CR4_RESERVED_BITS \
68 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
69 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
70 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
71 | X86_CR4_OSXSAVE \
72 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
73
74#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
75
76#define KVM_MAX_MCE_BANKS 32 63#define KVM_MAX_MCE_BANKS 32
77#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) 64#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
78 65
66#define emul_to_vcpu(ctxt) \
67 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
68
79/* EFER defaults: 69/* EFER defaults:
80 * - enable syscall per default because its emulated by KVM 70 * - enable syscall per default because its emulated by KVM
81 * - enable LME and LMA per default on 64 bit KVM 71 * - enable LME and LMA per default on 64 bit KVM
@@ -100,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
100int ignore_msrs = 0; 90int ignore_msrs = 0;
101module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); 91module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
102 92
93bool kvm_has_tsc_control;
94EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
95u32 kvm_max_guest_tsc_khz;
96EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
97
103#define KVM_NR_SHARED_MSRS 16 98#define KVM_NR_SHARED_MSRS 16
104 99
105struct kvm_shared_msrs_global { 100struct kvm_shared_msrs_global {
@@ -157,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
157 152
158u64 __read_mostly host_xcr0; 153u64 __read_mostly host_xcr0;
159 154
155int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
156
160static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) 157static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
161{ 158{
162 int i; 159 int i;
@@ -361,8 +358,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
361 358
362void kvm_inject_nmi(struct kvm_vcpu *vcpu) 359void kvm_inject_nmi(struct kvm_vcpu *vcpu)
363{ 360{
364 kvm_make_request(KVM_REQ_NMI, vcpu);
365 kvm_make_request(KVM_REQ_EVENT, vcpu); 361 kvm_make_request(KVM_REQ_EVENT, vcpu);
362 vcpu->arch.nmi_pending = 1;
366} 363}
367EXPORT_SYMBOL_GPL(kvm_inject_nmi); 364EXPORT_SYMBOL_GPL(kvm_inject_nmi);
368 365
@@ -982,7 +979,15 @@ static inline int kvm_tsc_changes_freq(void)
982 return ret; 979 return ret;
983} 980}
984 981
985static inline u64 nsec_to_cycles(u64 nsec) 982static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
983{
984 if (vcpu->arch.virtual_tsc_khz)
985 return vcpu->arch.virtual_tsc_khz;
986 else
987 return __this_cpu_read(cpu_tsc_khz);
988}
989
990static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
986{ 991{
987 u64 ret; 992 u64 ret;
988 993
@@ -990,25 +995,24 @@ static inline u64 nsec_to_cycles(u64 nsec)
990 if (kvm_tsc_changes_freq()) 995 if (kvm_tsc_changes_freq())
991 printk_once(KERN_WARNING 996 printk_once(KERN_WARNING
992 "kvm: unreliable cycle conversion on adjustable rate TSC\n"); 997 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
993 ret = nsec * __this_cpu_read(cpu_tsc_khz); 998 ret = nsec * vcpu_tsc_khz(vcpu);
994 do_div(ret, USEC_PER_SEC); 999 do_div(ret, USEC_PER_SEC);
995 return ret; 1000 return ret;
996} 1001}
997 1002
998static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) 1003static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
999{ 1004{
1000 /* Compute a scale to convert nanoseconds in TSC cycles */ 1005 /* Compute a scale to convert nanoseconds in TSC cycles */
1001 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, 1006 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1002 &kvm->arch.virtual_tsc_shift, 1007 &vcpu->arch.tsc_catchup_shift,
1003 &kvm->arch.virtual_tsc_mult); 1008 &vcpu->arch.tsc_catchup_mult);
1004 kvm->arch.virtual_tsc_khz = this_tsc_khz;
1005} 1009}
1006 1010
1007static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) 1011static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1008{ 1012{
1009 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, 1013 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
1010 vcpu->kvm->arch.virtual_tsc_mult, 1014 vcpu->arch.tsc_catchup_mult,
1011 vcpu->kvm->arch.virtual_tsc_shift); 1015 vcpu->arch.tsc_catchup_shift);
1012 tsc += vcpu->arch.last_tsc_write; 1016 tsc += vcpu->arch.last_tsc_write;
1013 return tsc; 1017 return tsc;
1014} 1018}
@@ -1021,7 +1025,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1021 s64 sdiff; 1025 s64 sdiff;
1022 1026
1023 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1027 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1024 offset = data - native_read_tsc(); 1028 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1025 ns = get_kernel_ns(); 1029 ns = get_kernel_ns();
1026 elapsed = ns - kvm->arch.last_tsc_nsec; 1030 elapsed = ns - kvm->arch.last_tsc_nsec;
1027 sdiff = data - kvm->arch.last_tsc_write; 1031 sdiff = data - kvm->arch.last_tsc_write;
@@ -1037,13 +1041,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1037 * In that case, for a reliable TSC, we can match TSC offsets, 1041 * In that case, for a reliable TSC, we can match TSC offsets,
1038 * or make a best guest using elapsed value. 1042 * or make a best guest using elapsed value.
1039 */ 1043 */
1040 if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && 1044 if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
1041 elapsed < 5ULL * NSEC_PER_SEC) { 1045 elapsed < 5ULL * NSEC_PER_SEC) {
1042 if (!check_tsc_unstable()) { 1046 if (!check_tsc_unstable()) {
1043 offset = kvm->arch.last_tsc_offset; 1047 offset = kvm->arch.last_tsc_offset;
1044 pr_debug("kvm: matched tsc offset for %llu\n", data); 1048 pr_debug("kvm: matched tsc offset for %llu\n", data);
1045 } else { 1049 } else {
1046 u64 delta = nsec_to_cycles(elapsed); 1050 u64 delta = nsec_to_cycles(vcpu, elapsed);
1047 offset += delta; 1051 offset += delta;
1048 pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1052 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1049 } 1053 }
@@ -1075,8 +1079,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1075 local_irq_save(flags); 1079 local_irq_save(flags);
1076 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); 1080 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
1077 kernel_ns = get_kernel_ns(); 1081 kernel_ns = get_kernel_ns();
1078 this_tsc_khz = __this_cpu_read(cpu_tsc_khz); 1082 this_tsc_khz = vcpu_tsc_khz(v);
1079
1080 if (unlikely(this_tsc_khz == 0)) { 1083 if (unlikely(this_tsc_khz == 0)) {
1081 local_irq_restore(flags); 1084 local_irq_restore(flags);
1082 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); 1085 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -1993,6 +1996,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1993 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1996 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1994 case KVM_CAP_XSAVE: 1997 case KVM_CAP_XSAVE:
1995 case KVM_CAP_ASYNC_PF: 1998 case KVM_CAP_ASYNC_PF:
1999 case KVM_CAP_GET_TSC_KHZ:
1996 r = 1; 2000 r = 1;
1997 break; 2001 break;
1998 case KVM_CAP_COALESCED_MMIO: 2002 case KVM_CAP_COALESCED_MMIO:
@@ -2019,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext)
2019 case KVM_CAP_XCRS: 2023 case KVM_CAP_XCRS:
2020 r = cpu_has_xsave; 2024 r = cpu_has_xsave;
2021 break; 2025 break;
2026 case KVM_CAP_TSC_CONTROL:
2027 r = kvm_has_tsc_control;
2028 break;
2022 default: 2029 default:
2023 r = 0; 2030 r = 0;
2024 break; 2031 break;
@@ -2120,8 +2127,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2120 kvm_x86_ops->vcpu_load(vcpu, cpu); 2127 kvm_x86_ops->vcpu_load(vcpu, cpu);
2121 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { 2128 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2122 /* Make sure TSC doesn't go backwards */ 2129 /* Make sure TSC doesn't go backwards */
2123 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : 2130 s64 tsc_delta;
2124 native_read_tsc() - vcpu->arch.last_host_tsc; 2131 u64 tsc;
2132
2133 kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
2134 tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
2135 tsc - vcpu->arch.last_guest_tsc;
2136
2125 if (tsc_delta < 0) 2137 if (tsc_delta < 0)
2126 mark_tsc_unstable("KVM discovered backwards TSC"); 2138 mark_tsc_unstable("KVM discovered backwards TSC");
2127 if (check_tsc_unstable()) { 2139 if (check_tsc_unstable()) {
@@ -2139,7 +2151,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2139{ 2151{
2140 kvm_x86_ops->vcpu_put(vcpu); 2152 kvm_x86_ops->vcpu_put(vcpu);
2141 kvm_put_guest_fpu(vcpu); 2153 kvm_put_guest_fpu(vcpu);
2142 vcpu->arch.last_host_tsc = native_read_tsc(); 2154 kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
2143} 2155}
2144 2156
2145static int is_efer_nx(void) 2157static int is_efer_nx(void)
@@ -2324,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2324 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 2336 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
2325 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); 2337 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
2326 2338
2339 /* cpuid 0xC0000001.edx */
2340 const u32 kvm_supported_word5_x86_features =
2341 F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
2342 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
2343 F(PMM) | F(PMM_EN);
2344
2327 /* all calls to cpuid_count() should be made on the same cpu */ 2345 /* all calls to cpuid_count() should be made on the same cpu */
2328 get_cpu(); 2346 get_cpu();
2329 do_cpuid_1_ent(entry, function, index); 2347 do_cpuid_1_ent(entry, function, index);
@@ -2418,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2418 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | 2436 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
2419 (1 << KVM_FEATURE_NOP_IO_DELAY) | 2437 (1 << KVM_FEATURE_NOP_IO_DELAY) |
2420 (1 << KVM_FEATURE_CLOCKSOURCE2) | 2438 (1 << KVM_FEATURE_CLOCKSOURCE2) |
2439 (1 << KVM_FEATURE_ASYNC_PF) |
2421 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); 2440 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
2422 entry->ebx = 0; 2441 entry->ebx = 0;
2423 entry->ecx = 0; 2442 entry->ecx = 0;
@@ -2432,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2432 entry->ecx &= kvm_supported_word6_x86_features; 2451 entry->ecx &= kvm_supported_word6_x86_features;
2433 cpuid_mask(&entry->ecx, 6); 2452 cpuid_mask(&entry->ecx, 6);
2434 break; 2453 break;
2454 /*Add support for Centaur's CPUID instruction*/
2455 case 0xC0000000:
2456 /*Just support up to 0xC0000004 now*/
2457 entry->eax = min(entry->eax, 0xC0000004);
2458 break;
2459 case 0xC0000001:
2460 entry->edx &= kvm_supported_word5_x86_features;
2461 cpuid_mask(&entry->edx, 5);
2462 break;
2463 case 0xC0000002:
2464 case 0xC0000003:
2465 case 0xC0000004:
2466 /*Now nothing to do, reserved for the future*/
2467 break;
2435 } 2468 }
2436 2469
2437 kvm_x86_ops->set_supported_cpuid(function, entry); 2470 kvm_x86_ops->set_supported_cpuid(function, entry);
@@ -2478,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
2478 if (nent >= cpuid->nent) 2511 if (nent >= cpuid->nent)
2479 goto out_free; 2512 goto out_free;
2480 2513
2514 /* Add support for Centaur's CPUID instruction. */
2515 if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
2516 do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0,
2517 &nent, cpuid->nent);
2518
2519 r = -E2BIG;
2520 if (nent >= cpuid->nent)
2521 goto out_free;
2522
2523 limit = cpuid_entries[nent - 1].eax;
2524 for (func = 0xC0000001;
2525 func <= limit && nent < cpuid->nent; ++func)
2526 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2527 &nent, cpuid->nent);
2528
2529 r = -E2BIG;
2530 if (nent >= cpuid->nent)
2531 goto out_free;
2532 }
2533
2481 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, 2534 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
2482 cpuid->nent); 2535 cpuid->nent);
2483 2536
@@ -3046,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
3046 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); 3099 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3047 break; 3100 break;
3048 } 3101 }
3102 case KVM_SET_TSC_KHZ: {
3103 u32 user_tsc_khz;
3104
3105 r = -EINVAL;
3106 if (!kvm_has_tsc_control)
3107 break;
3108
3109 user_tsc_khz = (u32)arg;
3110
3111 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3112 goto out;
3113
3114 kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
3115
3116 r = 0;
3117 goto out;
3118 }
3119 case KVM_GET_TSC_KHZ: {
3120 r = -EIO;
3121 if (check_tsc_unstable())
3122 goto out;
3123
3124 r = vcpu_tsc_khz(vcpu);
3125
3126 goto out;
3127 }
3049 default: 3128 default:
3050 r = -EINVAL; 3129 r = -EINVAL;
3051 } 3130 }
@@ -3595,20 +3674,43 @@ static void kvm_init_msr_list(void)
3595static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, 3674static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3596 const void *v) 3675 const void *v)
3597{ 3676{
3598 if (vcpu->arch.apic && 3677 int handled = 0;
3599 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3678 int n;
3600 return 0;
3601 3679
3602 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3680 do {
3681 n = min(len, 8);
3682 if (!(vcpu->arch.apic &&
3683 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
3684 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3685 break;
3686 handled += n;
3687 addr += n;
3688 len -= n;
3689 v += n;
3690 } while (len);
3691
3692 return handled;
3603} 3693}
3604 3694
3605static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) 3695static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3606{ 3696{
3607 if (vcpu->arch.apic && 3697 int handled = 0;
3608 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3698 int n;
3609 return 0; 3699
3700 do {
3701 n = min(len, 8);
3702 if (!(vcpu->arch.apic &&
3703 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
3704 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3705 break;
3706 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
3707 handled += n;
3708 addr += n;
3709 len -= n;
3710 v += n;
3711 } while (len);
3610 3712
3611 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3713 return handled;
3612} 3714}
3613 3715
3614static void kvm_set_segment(struct kvm_vcpu *vcpu, 3716static void kvm_set_segment(struct kvm_vcpu *vcpu,
@@ -3703,37 +3805,43 @@ out:
3703} 3805}
3704 3806
3705/* used for instruction fetching */ 3807/* used for instruction fetching */
3706static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, 3808static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
3707 struct kvm_vcpu *vcpu, 3809 gva_t addr, void *val, unsigned int bytes,
3708 struct x86_exception *exception) 3810 struct x86_exception *exception)
3709{ 3811{
3812 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3710 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3813 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3814
3711 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 3815 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3712 access | PFERR_FETCH_MASK, 3816 access | PFERR_FETCH_MASK,
3713 exception); 3817 exception);
3714} 3818}
3715 3819
3716static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3820static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
3717 struct kvm_vcpu *vcpu, 3821 gva_t addr, void *val, unsigned int bytes,
3718 struct x86_exception *exception) 3822 struct x86_exception *exception)
3719{ 3823{
3824 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3720 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3825 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3826
3721 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, 3827 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3722 exception); 3828 exception);
3723} 3829}
3724 3830
3725static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, 3831static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3726 struct kvm_vcpu *vcpu, 3832 gva_t addr, void *val, unsigned int bytes,
3727 struct x86_exception *exception) 3833 struct x86_exception *exception)
3728{ 3834{
3835 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3729 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); 3836 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
3730} 3837}
3731 3838
3732static int kvm_write_guest_virt_system(gva_t addr, void *val, 3839static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3840 gva_t addr, void *val,
3733 unsigned int bytes, 3841 unsigned int bytes,
3734 struct kvm_vcpu *vcpu,
3735 struct x86_exception *exception) 3842 struct x86_exception *exception)
3736{ 3843{
3844 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3737 void *data = val; 3845 void *data = val;
3738 int r = X86EMUL_CONTINUE; 3846 int r = X86EMUL_CONTINUE;
3739 3847
@@ -3761,13 +3869,15 @@ out:
3761 return r; 3869 return r;
3762} 3870}
3763 3871
3764static int emulator_read_emulated(unsigned long addr, 3872static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
3873 unsigned long addr,
3765 void *val, 3874 void *val,
3766 unsigned int bytes, 3875 unsigned int bytes,
3767 struct x86_exception *exception, 3876 struct x86_exception *exception)
3768 struct kvm_vcpu *vcpu)
3769{ 3877{
3878 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3770 gpa_t gpa; 3879 gpa_t gpa;
3880 int handled;
3771 3881
3772 if (vcpu->mmio_read_completed) { 3882 if (vcpu->mmio_read_completed) {
3773 memcpy(val, vcpu->mmio_data, bytes); 3883 memcpy(val, vcpu->mmio_data, bytes);
@@ -3786,7 +3896,7 @@ static int emulator_read_emulated(unsigned long addr,
3786 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3896 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3787 goto mmio; 3897 goto mmio;
3788 3898
3789 if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) 3899 if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
3790 == X86EMUL_CONTINUE) 3900 == X86EMUL_CONTINUE)
3791 return X86EMUL_CONTINUE; 3901 return X86EMUL_CONTINUE;
3792 3902
@@ -3794,18 +3904,24 @@ mmio:
3794 /* 3904 /*
3795 * Is this MMIO handled locally? 3905 * Is this MMIO handled locally?
3796 */ 3906 */
3797 if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { 3907 handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
3798 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); 3908
3909 if (handled == bytes)
3799 return X86EMUL_CONTINUE; 3910 return X86EMUL_CONTINUE;
3800 } 3911
3912 gpa += handled;
3913 bytes -= handled;
3914 val += handled;
3801 3915
3802 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 3916 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
3803 3917
3804 vcpu->mmio_needed = 1; 3918 vcpu->mmio_needed = 1;
3805 vcpu->run->exit_reason = KVM_EXIT_MMIO; 3919 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3806 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; 3920 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3807 vcpu->run->mmio.len = vcpu->mmio_size = bytes; 3921 vcpu->mmio_size = bytes;
3922 vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
3808 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; 3923 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
3924 vcpu->mmio_index = 0;
3809 3925
3810 return X86EMUL_IO_NEEDED; 3926 return X86EMUL_IO_NEEDED;
3811} 3927}
@@ -3829,6 +3945,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
3829 struct kvm_vcpu *vcpu) 3945 struct kvm_vcpu *vcpu)
3830{ 3946{
3831 gpa_t gpa; 3947 gpa_t gpa;
3948 int handled;
3832 3949
3833 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); 3950 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
3834 3951
@@ -3847,25 +3964,35 @@ mmio:
3847 /* 3964 /*
3848 * Is this MMIO handled locally? 3965 * Is this MMIO handled locally?
3849 */ 3966 */
3850 if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) 3967 handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
3968 if (handled == bytes)
3851 return X86EMUL_CONTINUE; 3969 return X86EMUL_CONTINUE;
3852 3970
3971 gpa += handled;
3972 bytes -= handled;
3973 val += handled;
3974
3853 vcpu->mmio_needed = 1; 3975 vcpu->mmio_needed = 1;
3976 memcpy(vcpu->mmio_data, val, bytes);
3854 vcpu->run->exit_reason = KVM_EXIT_MMIO; 3977 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3855 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; 3978 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3856 vcpu->run->mmio.len = vcpu->mmio_size = bytes; 3979 vcpu->mmio_size = bytes;
3980 vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
3857 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; 3981 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
3858 memcpy(vcpu->run->mmio.data, val, bytes); 3982 memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
3983 vcpu->mmio_index = 0;
3859 3984
3860 return X86EMUL_CONTINUE; 3985 return X86EMUL_CONTINUE;
3861} 3986}
3862 3987
3863int emulator_write_emulated(unsigned long addr, 3988int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
3989 unsigned long addr,
3864 const void *val, 3990 const void *val,
3865 unsigned int bytes, 3991 unsigned int bytes,
3866 struct x86_exception *exception, 3992 struct x86_exception *exception)
3867 struct kvm_vcpu *vcpu)
3868{ 3993{
3994 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3995
3869 /* Crossing a page boundary? */ 3996 /* Crossing a page boundary? */
3870 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 3997 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
3871 int rc, now; 3998 int rc, now;
@@ -3893,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr,
3893 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) 4020 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
3894#endif 4021#endif
3895 4022
3896static int emulator_cmpxchg_emulated(unsigned long addr, 4023static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4024 unsigned long addr,
3897 const void *old, 4025 const void *old,
3898 const void *new, 4026 const void *new,
3899 unsigned int bytes, 4027 unsigned int bytes,
3900 struct x86_exception *exception, 4028 struct x86_exception *exception)
3901 struct kvm_vcpu *vcpu)
3902{ 4029{
4030 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3903 gpa_t gpa; 4031 gpa_t gpa;
3904 struct page *page; 4032 struct page *page;
3905 char *kaddr; 4033 char *kaddr;
@@ -3955,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
3955emul_write: 4083emul_write:
3956 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 4084 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3957 4085
3958 return emulator_write_emulated(addr, new, bytes, exception, vcpu); 4086 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
3959} 4087}
3960 4088
3961static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) 4089static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
@@ -3974,9 +4102,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3974} 4102}
3975 4103
3976 4104
3977static int emulator_pio_in_emulated(int size, unsigned short port, void *val, 4105static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
3978 unsigned int count, struct kvm_vcpu *vcpu) 4106 int size, unsigned short port, void *val,
4107 unsigned int count)
3979{ 4108{
4109 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4110
3980 if (vcpu->arch.pio.count) 4111 if (vcpu->arch.pio.count)
3981 goto data_avail; 4112 goto data_avail;
3982 4113
@@ -4004,10 +4135,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
4004 return 0; 4135 return 0;
4005} 4136}
4006 4137
4007static int emulator_pio_out_emulated(int size, unsigned short port, 4138static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4008 const void *val, unsigned int count, 4139 int size, unsigned short port,
4009 struct kvm_vcpu *vcpu) 4140 const void *val, unsigned int count)
4010{ 4141{
4142 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4143
4011 trace_kvm_pio(1, port, size, count); 4144 trace_kvm_pio(1, port, size, count);
4012 4145
4013 vcpu->arch.pio.port = port; 4146 vcpu->arch.pio.port = port;
@@ -4037,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4037 return kvm_x86_ops->get_segment_base(vcpu, seg); 4170 return kvm_x86_ops->get_segment_base(vcpu, seg);
4038} 4171}
4039 4172
4040int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 4173static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4041{ 4174{
4042 kvm_mmu_invlpg(vcpu, address); 4175 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4043 return X86EMUL_CONTINUE;
4044} 4176}
4045 4177
4046int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) 4178int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
@@ -4062,22 +4194,20 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4062} 4194}
4063EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); 4195EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4064 4196
4065int emulate_clts(struct kvm_vcpu *vcpu) 4197static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4066{ 4198{
4067 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); 4199 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4068 kvm_x86_ops->fpu_activate(vcpu);
4069 return X86EMUL_CONTINUE;
4070} 4200}
4071 4201
4072int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) 4202int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4073{ 4203{
4074 return _kvm_get_dr(vcpu, dr, dest); 4204 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4075} 4205}
4076 4206
4077int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) 4207int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4078{ 4208{
4079 4209
4080 return __kvm_set_dr(vcpu, dr, value); 4210 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4081} 4211}
4082 4212
4083static u64 mk_cr_64(u64 curr_cr, u32 new_val) 4213static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -4085,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4085 return (curr_cr & ~((1ULL << 32) - 1)) | new_val; 4215 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4086} 4216}
4087 4217
4088static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) 4218static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4089{ 4219{
4220 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4090 unsigned long value; 4221 unsigned long value;
4091 4222
4092 switch (cr) { 4223 switch (cr) {
@@ -4113,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
4113 return value; 4244 return value;
4114} 4245}
4115 4246
4116static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) 4247static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4117{ 4248{
4249 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4118 int res = 0; 4250 int res = 0;
4119 4251
4120 switch (cr) { 4252 switch (cr) {
@@ -4141,33 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
4141 return res; 4273 return res;
4142} 4274}
4143 4275
4144static int emulator_get_cpl(struct kvm_vcpu *vcpu) 4276static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4277{
4278 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4279}
4280
4281static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4282{
4283 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4284}
4285
4286static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4145{ 4287{
4146 return kvm_x86_ops->get_cpl(vcpu); 4288 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4147} 4289}
4148 4290
4149static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) 4291static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4150{ 4292{
4151 kvm_x86_ops->get_gdt(vcpu, dt); 4293 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4152} 4294}
4153 4295
4154static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) 4296static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4155{ 4297{
4156 kvm_x86_ops->get_idt(vcpu, dt); 4298 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4157} 4299}
4158 4300
4159static unsigned long emulator_get_cached_segment_base(int seg, 4301static unsigned long emulator_get_cached_segment_base(
4160 struct kvm_vcpu *vcpu) 4302 struct x86_emulate_ctxt *ctxt, int seg)
4161{ 4303{
4162 return get_segment_base(vcpu, seg); 4304 return get_segment_base(emul_to_vcpu(ctxt), seg);
4163} 4305}
4164 4306
4165static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, 4307static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4166 int seg, struct kvm_vcpu *vcpu) 4308 struct desc_struct *desc, u32 *base3,
4309 int seg)
4167{ 4310{
4168 struct kvm_segment var; 4311 struct kvm_segment var;
4169 4312
4170 kvm_get_segment(vcpu, &var, seg); 4313 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4314 *selector = var.selector;
4171 4315
4172 if (var.unusable) 4316 if (var.unusable)
4173 return false; 4317 return false;
@@ -4192,14 +4336,14 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3,
4192 return true; 4336 return true;
4193} 4337}
4194 4338
4195static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, 4339static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4196 int seg, struct kvm_vcpu *vcpu) 4340 struct desc_struct *desc, u32 base3,
4341 int seg)
4197{ 4342{
4343 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4198 struct kvm_segment var; 4344 struct kvm_segment var;
4199 4345
4200 /* needed to preserve selector */ 4346 var.selector = selector;
4201 kvm_get_segment(vcpu, &var, seg);
4202
4203 var.base = get_desc_base(desc); 4347 var.base = get_desc_base(desc);
4204#ifdef CONFIG_X86_64 4348#ifdef CONFIG_X86_64
4205 var.base |= ((u64)base3) << 32; 4349 var.base |= ((u64)base3) << 32;
@@ -4223,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3,
4223 return; 4367 return;
4224} 4368}
4225 4369
4226static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) 4370static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4371 u32 msr_index, u64 *pdata)
4227{ 4372{
4228 struct kvm_segment kvm_seg; 4373 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4374}
4229 4375
4230 kvm_get_segment(vcpu, &kvm_seg, seg); 4376static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4231 return kvm_seg.selector; 4377 u32 msr_index, u64 data)
4378{
4379 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
4232} 4380}
4233 4381
4234static void emulator_set_segment_selector(u16 sel, int seg, 4382static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4235 struct kvm_vcpu *vcpu)
4236{ 4383{
4237 struct kvm_segment kvm_seg; 4384 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4385}
4238 4386
4239 kvm_get_segment(vcpu, &kvm_seg, seg); 4387static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4240 kvm_seg.selector = sel; 4388{
4241 kvm_set_segment(vcpu, &kvm_seg, seg); 4389 preempt_disable();
4390 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4391 /*
4392 * CR0.TS may reference the host fpu state, not the guest fpu state,
4393 * so it may be clear at this point.
4394 */
4395 clts();
4396}
4397
4398static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4399{
4400 preempt_enable();
4401}
4402
4403static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4404 struct x86_instruction_info *info,
4405 enum x86_intercept_stage stage)
4406{
4407 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4242} 4408}
4243 4409
4244static struct x86_emulate_ops emulate_ops = { 4410static struct x86_emulate_ops emulate_ops = {
@@ -4248,22 +4414,29 @@ static struct x86_emulate_ops emulate_ops = {
4248 .read_emulated = emulator_read_emulated, 4414 .read_emulated = emulator_read_emulated,
4249 .write_emulated = emulator_write_emulated, 4415 .write_emulated = emulator_write_emulated,
4250 .cmpxchg_emulated = emulator_cmpxchg_emulated, 4416 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4417 .invlpg = emulator_invlpg,
4251 .pio_in_emulated = emulator_pio_in_emulated, 4418 .pio_in_emulated = emulator_pio_in_emulated,
4252 .pio_out_emulated = emulator_pio_out_emulated, 4419 .pio_out_emulated = emulator_pio_out_emulated,
4253 .get_cached_descriptor = emulator_get_cached_descriptor, 4420 .get_segment = emulator_get_segment,
4254 .set_cached_descriptor = emulator_set_cached_descriptor, 4421 .set_segment = emulator_set_segment,
4255 .get_segment_selector = emulator_get_segment_selector,
4256 .set_segment_selector = emulator_set_segment_selector,
4257 .get_cached_segment_base = emulator_get_cached_segment_base, 4422 .get_cached_segment_base = emulator_get_cached_segment_base,
4258 .get_gdt = emulator_get_gdt, 4423 .get_gdt = emulator_get_gdt,
4259 .get_idt = emulator_get_idt, 4424 .get_idt = emulator_get_idt,
4425 .set_gdt = emulator_set_gdt,
4426 .set_idt = emulator_set_idt,
4260 .get_cr = emulator_get_cr, 4427 .get_cr = emulator_get_cr,
4261 .set_cr = emulator_set_cr, 4428 .set_cr = emulator_set_cr,
4262 .cpl = emulator_get_cpl, 4429 .cpl = emulator_get_cpl,
4263 .get_dr = emulator_get_dr, 4430 .get_dr = emulator_get_dr,
4264 .set_dr = emulator_set_dr, 4431 .set_dr = emulator_set_dr,
4265 .set_msr = kvm_set_msr, 4432 .set_msr = emulator_set_msr,
4266 .get_msr = kvm_get_msr, 4433 .get_msr = emulator_get_msr,
4434 .halt = emulator_halt,
4435 .wbinvd = emulator_wbinvd,
4436 .fix_hypercall = emulator_fix_hypercall,
4437 .get_fpu = emulator_get_fpu,
4438 .put_fpu = emulator_put_fpu,
4439 .intercept = emulator_intercept,
4267}; 4440};
4268 4441
4269static void cache_all_regs(struct kvm_vcpu *vcpu) 4442static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -4305,12 +4478,17 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4305 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4478 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4306 int cs_db, cs_l; 4479 int cs_db, cs_l;
4307 4480
4481 /*
4482 * TODO: fix emulate.c to use guest_read/write_register
4483 * instead of direct ->regs accesses, can save hundred cycles
4484 * on Intel for instructions that don't read/change RSP, for
4485 * for example.
4486 */
4308 cache_all_regs(vcpu); 4487 cache_all_regs(vcpu);
4309 4488
4310 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 4489 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4311 4490
4312 vcpu->arch.emulate_ctxt.vcpu = vcpu; 4491 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
4313 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
4314 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); 4492 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
4315 vcpu->arch.emulate_ctxt.mode = 4493 vcpu->arch.emulate_ctxt.mode =
4316 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 4494 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
@@ -4318,11 +4496,13 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4318 ? X86EMUL_MODE_VM86 : cs_l 4496 ? X86EMUL_MODE_VM86 : cs_l
4319 ? X86EMUL_MODE_PROT64 : cs_db 4497 ? X86EMUL_MODE_PROT64 : cs_db
4320 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 4498 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
4499 vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu);
4321 memset(c, 0, sizeof(struct decode_cache)); 4500 memset(c, 0, sizeof(struct decode_cache));
4322 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 4501 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4502 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4323} 4503}
4324 4504
4325int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) 4505int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4326{ 4506{
4327 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4507 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4328 int ret; 4508 int ret;
@@ -4331,7 +4511,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
4331 4511
4332 vcpu->arch.emulate_ctxt.decode.op_bytes = 2; 4512 vcpu->arch.emulate_ctxt.decode.op_bytes = 2;
4333 vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; 4513 vcpu->arch.emulate_ctxt.decode.ad_bytes = 2;
4334 vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; 4514 vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip +
4515 inc_eip;
4335 ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); 4516 ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
4336 4517
4337 if (ret != X86EMUL_CONTINUE) 4518 if (ret != X86EMUL_CONTINUE)
@@ -4340,7 +4521,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
4340 vcpu->arch.emulate_ctxt.eip = c->eip; 4521 vcpu->arch.emulate_ctxt.eip = c->eip;
4341 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4522 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4342 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4523 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4343 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4524 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4344 4525
4345 if (irq == NMI_VECTOR) 4526 if (irq == NMI_VECTOR)
4346 vcpu->arch.nmi_pending = false; 4527 vcpu->arch.nmi_pending = false;
@@ -4402,16 +4583,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4402{ 4583{
4403 int r; 4584 int r;
4404 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4585 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4586 bool writeback = true;
4405 4587
4406 kvm_clear_exception_queue(vcpu); 4588 kvm_clear_exception_queue(vcpu);
4407 vcpu->arch.mmio_fault_cr2 = cr2;
4408 /*
4409 * TODO: fix emulate.c to use guest_read/write_register
4410 * instead of direct ->regs accesses, can save hundred cycles
4411 * on Intel for instructions that don't read/change RSP, for
4412 * for example.
4413 */
4414 cache_all_regs(vcpu);
4415 4589
4416 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4590 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4417 init_emulate_ctxt(vcpu); 4591 init_emulate_ctxt(vcpu);
@@ -4442,13 +4616,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4442 return EMULATE_DONE; 4616 return EMULATE_DONE;
4443 } 4617 }
4444 4618
4445 /* this is needed for vmware backdor interface to work since it 4619 /* this is needed for vmware backdoor interface to work since it
4446 changes registers values during IO operation */ 4620 changes registers values during IO operation */
4447 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 4621 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
4622 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4623 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4624 }
4448 4625
4449restart: 4626restart:
4450 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); 4627 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
4451 4628
4629 if (r == EMULATION_INTERCEPTED)
4630 return EMULATE_DONE;
4631
4452 if (r == EMULATION_FAILED) { 4632 if (r == EMULATION_FAILED) {
4453 if (reexecute_instruction(vcpu, cr2)) 4633 if (reexecute_instruction(vcpu, cr2))
4454 return EMULATE_DONE; 4634 return EMULATE_DONE;
@@ -4462,21 +4642,28 @@ restart:
4462 } else if (vcpu->arch.pio.count) { 4642 } else if (vcpu->arch.pio.count) {
4463 if (!vcpu->arch.pio.in) 4643 if (!vcpu->arch.pio.in)
4464 vcpu->arch.pio.count = 0; 4644 vcpu->arch.pio.count = 0;
4645 else
4646 writeback = false;
4465 r = EMULATE_DO_MMIO; 4647 r = EMULATE_DO_MMIO;
4466 } else if (vcpu->mmio_needed) { 4648 } else if (vcpu->mmio_needed) {
4467 if (vcpu->mmio_is_write) 4649 if (!vcpu->mmio_is_write)
4468 vcpu->mmio_needed = 0; 4650 writeback = false;
4469 r = EMULATE_DO_MMIO; 4651 r = EMULATE_DO_MMIO;
4470 } else if (r == EMULATION_RESTART) 4652 } else if (r == EMULATION_RESTART)
4471 goto restart; 4653 goto restart;
4472 else 4654 else
4473 r = EMULATE_DONE; 4655 r = EMULATE_DONE;
4474 4656
4475 toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); 4657 if (writeback) {
4476 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4658 toggle_interruptibility(vcpu,
4477 kvm_make_request(KVM_REQ_EVENT, vcpu); 4659 vcpu->arch.emulate_ctxt.interruptibility);
4478 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4660 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4479 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4661 kvm_make_request(KVM_REQ_EVENT, vcpu);
4662 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4663 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
4664 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4665 } else
4666 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4480 4667
4481 return r; 4668 return r;
4482} 4669}
@@ -4485,7 +4672,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction);
4485int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) 4672int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4486{ 4673{
4487 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); 4674 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
4488 int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); 4675 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
4676 size, port, &val, 1);
4489 /* do not return to emulator after return from userspace */ 4677 /* do not return to emulator after return from userspace */
4490 vcpu->arch.pio.count = 0; 4678 vcpu->arch.pio.count = 0;
4491 return ret; 4679 return ret;
@@ -4879,8 +5067,9 @@ out:
4879} 5067}
4880EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 5068EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
4881 5069
4882int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 5070int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
4883{ 5071{
5072 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4884 char instruction[3]; 5073 char instruction[3];
4885 unsigned long rip = kvm_rip_read(vcpu); 5074 unsigned long rip = kvm_rip_read(vcpu);
4886 5075
@@ -4893,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
4893 5082
4894 kvm_x86_ops->patch_hypercall(vcpu, instruction); 5083 kvm_x86_ops->patch_hypercall(vcpu, instruction);
4895 5084
4896 return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); 5085 return emulator_write_emulated(&vcpu->arch.emulate_ctxt,
4897} 5086 rip, instruction, 3, NULL);
4898
4899void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4900{
4901 struct desc_ptr dt = { limit, base };
4902
4903 kvm_x86_ops->set_gdt(vcpu, &dt);
4904}
4905
4906void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4907{
4908 struct desc_ptr dt = { limit, base };
4909
4910 kvm_x86_ops->set_idt(vcpu, &dt);
4911} 5087}
4912 5088
4913static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) 5089static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
@@ -5170,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
5170static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5346static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5171{ 5347{
5172 int r; 5348 int r;
5349 bool nmi_pending;
5173 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 5350 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5174 vcpu->run->request_interrupt_window; 5351 vcpu->run->request_interrupt_window;
5175 5352
@@ -5207,19 +5384,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5207 r = 1; 5384 r = 1;
5208 goto out; 5385 goto out;
5209 } 5386 }
5210 if (kvm_check_request(KVM_REQ_NMI, vcpu))
5211 vcpu->arch.nmi_pending = true;
5212 } 5387 }
5213 5388
5214 r = kvm_mmu_reload(vcpu); 5389 r = kvm_mmu_reload(vcpu);
5215 if (unlikely(r)) 5390 if (unlikely(r))
5216 goto out; 5391 goto out;
5217 5392
5393 /*
5394 * An NMI can be injected between local nmi_pending read and
5395 * vcpu->arch.nmi_pending read inside inject_pending_event().
5396 * But in that case, KVM_REQ_EVENT will be set, which makes
5397 * the race described above benign.
5398 */
5399 nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
5400
5218 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5401 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5219 inject_pending_event(vcpu); 5402 inject_pending_event(vcpu);
5220 5403
5221 /* enable NMI/IRQ window open exits if needed */ 5404 /* enable NMI/IRQ window open exits if needed */
5222 if (vcpu->arch.nmi_pending) 5405 if (nmi_pending)
5223 kvm_x86_ops->enable_nmi_window(vcpu); 5406 kvm_x86_ops->enable_nmi_window(vcpu);
5224 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 5407 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
5225 kvm_x86_ops->enable_irq_window(vcpu); 5408 kvm_x86_ops->enable_irq_window(vcpu);
@@ -5399,6 +5582,41 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5399 return r; 5582 return r;
5400} 5583}
5401 5584
5585static int complete_mmio(struct kvm_vcpu *vcpu)
5586{
5587 struct kvm_run *run = vcpu->run;
5588 int r;
5589
5590 if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
5591 return 1;
5592
5593 if (vcpu->mmio_needed) {
5594 vcpu->mmio_needed = 0;
5595 if (!vcpu->mmio_is_write)
5596 memcpy(vcpu->mmio_data + vcpu->mmio_index,
5597 run->mmio.data, 8);
5598 vcpu->mmio_index += 8;
5599 if (vcpu->mmio_index < vcpu->mmio_size) {
5600 run->exit_reason = KVM_EXIT_MMIO;
5601 run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
5602 memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
5603 run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
5604 run->mmio.is_write = vcpu->mmio_is_write;
5605 vcpu->mmio_needed = 1;
5606 return 0;
5607 }
5608 if (vcpu->mmio_is_write)
5609 return 1;
5610 vcpu->mmio_read_completed = 1;
5611 }
5612 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5613 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5614 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5615 if (r != EMULATE_DONE)
5616 return 0;
5617 return 1;
5618}
5619
5402int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 5620int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5403{ 5621{
5404 int r; 5622 int r;
@@ -5425,20 +5643,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5425 } 5643 }
5426 } 5644 }
5427 5645
5428 if (vcpu->arch.pio.count || vcpu->mmio_needed) { 5646 r = complete_mmio(vcpu);
5429 if (vcpu->mmio_needed) { 5647 if (r <= 0)
5430 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 5648 goto out;
5431 vcpu->mmio_read_completed = 1; 5649
5432 vcpu->mmio_needed = 0;
5433 }
5434 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5435 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5436 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5437 if (r != EMULATE_DONE) {
5438 r = 0;
5439 goto out;
5440 }
5441 }
5442 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) 5650 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
5443 kvm_register_write(vcpu, VCPU_REGS_RAX, 5651 kvm_register_write(vcpu, VCPU_REGS_RAX,
5444 kvm_run->hypercall.ret); 5652 kvm_run->hypercall.ret);
@@ -5455,6 +5663,18 @@ out:
5455 5663
5456int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 5664int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5457{ 5665{
5666 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
5667 /*
5668 * We are here if userspace calls get_regs() in the middle of
5669 * instruction emulation. Registers state needs to be copied
5670 * back from emulation context to vcpu. Usrapace shouldn't do
5671 * that usually, but some bad designed PV devices (vmware
5672 * backdoor interface) need this to work
5673 */
5674 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
5675 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
5676 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5677 }
5458 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 5678 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5459 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); 5679 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5460 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); 5680 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -5482,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5482 5702
5483int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 5703int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5484{ 5704{
5705 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
5706 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5707
5485 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); 5708 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
5486 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); 5709 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
5487 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); 5710 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
@@ -5592,7 +5815,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
5592 5815
5593 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 5816 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
5594 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 5817 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
5595 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 5818 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
5596 kvm_make_request(KVM_REQ_EVENT, vcpu); 5819 kvm_make_request(KVM_REQ_EVENT, vcpu);
5597 return EMULATE_DONE; 5820 return EMULATE_DONE;
5598} 5821}
@@ -5974,8 +6197,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5974 } 6197 }
5975 vcpu->arch.pio_data = page_address(page); 6198 vcpu->arch.pio_data = page_address(page);
5976 6199
5977 if (!kvm->arch.virtual_tsc_khz) 6200 kvm_init_tsc_catchup(vcpu, max_tsc_khz);
5978 kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
5979 6201
5980 r = kvm_mmu_create(vcpu); 6202 r = kvm_mmu_create(vcpu);
5981 if (r < 0) 6203 if (r < 0)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c600da830ce0..e407ed3df817 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -77,7 +77,7 @@ static inline u32 bit(int bitno)
77 77
78void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 78void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
79void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 79void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
80int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); 80int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
81 81
82void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); 82void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
83 83
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ea2dc1a2e13d..55ef181521ff 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo {
541#define KVM_CAP_PPC_GET_PVINFO 57 541#define KVM_CAP_PPC_GET_PVINFO 57
542#define KVM_CAP_PPC_IRQ_LEVEL 58 542#define KVM_CAP_PPC_IRQ_LEVEL 58
543#define KVM_CAP_ASYNC_PF 59 543#define KVM_CAP_ASYNC_PF 59
544#define KVM_CAP_TSC_CONTROL 60
545#define KVM_CAP_GET_TSC_KHZ 61
546#define KVM_CAP_PPC_BOOKE_SREGS 62
544 547
545#ifdef KVM_CAP_IRQ_ROUTING 548#ifdef KVM_CAP_IRQ_ROUTING
546 549
@@ -677,6 +680,9 @@ struct kvm_clock_data {
677#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) 680#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
678/* Available with KVM_CAP_PPC_GET_PVINFO */ 681/* Available with KVM_CAP_PPC_GET_PVINFO */
679#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) 682#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo)
683/* Available with KVM_CAP_TSC_CONTROL */
684#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
685#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
680 686
681/* 687/*
682 * ioctls for vcpu fds 688 * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ab428552af8e..b9c3299c6a55 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -27,6 +27,10 @@
27 27
28#include <asm/kvm_host.h> 28#include <asm/kvm_host.h>
29 29
30#ifndef KVM_MMIO_SIZE
31#define KVM_MMIO_SIZE 8
32#endif
33
30/* 34/*
31 * vcpu->requests bit members 35 * vcpu->requests bit members
32 */ 36 */
@@ -43,7 +47,6 @@
43#define KVM_REQ_DEACTIVATE_FPU 10 47#define KVM_REQ_DEACTIVATE_FPU 10
44#define KVM_REQ_EVENT 11 48#define KVM_REQ_EVENT 11
45#define KVM_REQ_APF_HALT 12 49#define KVM_REQ_APF_HALT 12
46#define KVM_REQ_NMI 13
47 50
48#define KVM_USERSPACE_IRQ_SOURCE_ID 0 51#define KVM_USERSPACE_IRQ_SOURCE_ID 0
49 52
@@ -133,7 +136,8 @@ struct kvm_vcpu {
133 int mmio_read_completed; 136 int mmio_read_completed;
134 int mmio_is_write; 137 int mmio_is_write;
135 int mmio_size; 138 int mmio_size;
136 unsigned char mmio_data[8]; 139 int mmio_index;
140 unsigned char mmio_data[KVM_MMIO_SIZE];
137 gpa_t mmio_phys_addr; 141 gpa_t mmio_phys_addr;
138#endif 142#endif
139 143
@@ -292,9 +296,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
292} 296}
293 297
294#define kvm_for_each_vcpu(idx, vcpup, kvm) \ 298#define kvm_for_each_vcpu(idx, vcpup, kvm) \
295 for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \ 299 for (idx = 0; \
296 idx < atomic_read(&kvm->online_vcpus) && vcpup; \ 300 idx < atomic_read(&kvm->online_vcpus) && \
297 vcpup = kvm_get_vcpu(kvm, ++idx)) 301 (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
302 idx++)
298 303
299int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); 304int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
300void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); 305void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
@@ -365,7 +370,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
365 bool *writable); 370 bool *writable);
366pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 371pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
367 struct kvm_memory_slot *slot, gfn_t gfn); 372 struct kvm_memory_slot *slot, gfn_t gfn);
368int memslot_id(struct kvm *kvm, gfn_t gfn);
369void kvm_release_pfn_dirty(pfn_t); 373void kvm_release_pfn_dirty(pfn_t);
370void kvm_release_pfn_clean(pfn_t pfn); 374void kvm_release_pfn_clean(pfn_t pfn);
371void kvm_set_pfn_dirty(pfn_t pfn); 375void kvm_set_pfn_dirty(pfn_t pfn);
@@ -587,8 +591,17 @@ static inline int kvm_deassign_device(struct kvm *kvm,
587 591
588static inline void kvm_guest_enter(void) 592static inline void kvm_guest_enter(void)
589{ 593{
594 BUG_ON(preemptible());
590 account_system_vtime(current); 595 account_system_vtime(current);
591 current->flags |= PF_VCPU; 596 current->flags |= PF_VCPU;
597 /* KVM does not hold any references to rcu protected data when it
598 * switches CPU into a guest mode. In fact switching to a guest mode
599 * is very similar to exiting to userspase from rcu point of view. In
600 * addition CPU may stay in a guest mode for quite a long time (up to
601 * one time slice). Lets treat guest mode as quiescent state, just like
602 * we do with user-mode execution.
603 */
604 rcu_virt_note_context_switch(smp_processor_id());
592} 605}
593 606
594static inline void kvm_guest_exit(void) 607static inline void kvm_guest_exit(void)
@@ -597,6 +610,11 @@ static inline void kvm_guest_exit(void)
597 current->flags &= ~PF_VCPU; 610 current->flags &= ~PF_VCPU;
598} 611}
599 612
613static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
614{
615 return gfn_to_memslot(kvm, gfn)->id;
616}
617
600static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, 618static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
601 gfn_t gfn) 619 gfn_t gfn)
602{ 620{
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 0b9df8303dcf..8df1ca104a7f 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -167,7 +167,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
167 167
168 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 168 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
169 "vector=%x trig_mode=%x\n", 169 "vector=%x trig_mode=%x\n",
170 entry->fields.dest, entry->fields.dest_mode, 170 entry->fields.dest_id, entry->fields.dest_mode,
171 entry->fields.delivery_mode, entry->fields.vector, 171 entry->fields.delivery_mode, entry->fields.vector,
172 entry->fields.trig_mode); 172 entry->fields.trig_mode);
173 173
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6330653480e4..22cdb960660a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -467,6 +467,7 @@ static struct kvm *kvm_create_vm(void)
467 if (!kvm->buses[i]) 467 if (!kvm->buses[i])
468 goto out_err; 468 goto out_err;
469 } 469 }
470 spin_lock_init(&kvm->mmu_lock);
470 471
471 r = kvm_init_mmu_notifier(kvm); 472 r = kvm_init_mmu_notifier(kvm);
472 if (r) 473 if (r)
@@ -474,7 +475,6 @@ static struct kvm *kvm_create_vm(void)
474 475
475 kvm->mm = current->mm; 476 kvm->mm = current->mm;
476 atomic_inc(&kvm->mm->mm_count); 477 atomic_inc(&kvm->mm->mm_count);
477 spin_lock_init(&kvm->mmu_lock);
478 kvm_eventfd_init(kvm); 478 kvm_eventfd_init(kvm);
479 mutex_init(&kvm->lock); 479 mutex_init(&kvm->lock);
480 mutex_init(&kvm->irq_lock); 480 mutex_init(&kvm->irq_lock);
@@ -648,7 +648,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
648 goto out; 648 goto out;
649 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 649 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
650 goto out; 650 goto out;
651 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) 651 /* We can read the guest memory with __xxx_user() later on. */
652 if (user_alloc &&
653 ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
654 !access_ok(VERIFY_WRITE, mem->userspace_addr, mem->memory_size)))
652 goto out; 655 goto out;
653 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 656 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
654 goto out; 657 goto out;
@@ -996,23 +999,6 @@ out:
996 return size; 999 return size;
997} 1000}
998 1001
999int memslot_id(struct kvm *kvm, gfn_t gfn)
1000{
1001 int i;
1002 struct kvm_memslots *slots = kvm_memslots(kvm);
1003 struct kvm_memory_slot *memslot = NULL;
1004
1005 for (i = 0; i < slots->nmemslots; ++i) {
1006 memslot = &slots->memslots[i];
1007
1008 if (gfn >= memslot->base_gfn
1009 && gfn < memslot->base_gfn + memslot->npages)
1010 break;
1011 }
1012
1013 return memslot - slots->memslots;
1014}
1015
1016static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, 1002static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
1017 gfn_t *nr_pages) 1003 gfn_t *nr_pages)
1018{ 1004{
@@ -1300,7 +1286,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
1300 addr = gfn_to_hva(kvm, gfn); 1286 addr = gfn_to_hva(kvm, gfn);
1301 if (kvm_is_error_hva(addr)) 1287 if (kvm_is_error_hva(addr))
1302 return -EFAULT; 1288 return -EFAULT;
1303 r = copy_from_user(data, (void __user *)addr + offset, len); 1289 r = __copy_from_user(data, (void __user *)addr + offset, len);
1304 if (r) 1290 if (r)
1305 return -EFAULT; 1291 return -EFAULT;
1306 return 0; 1292 return 0;