diff options
34 files changed, 3054 insertions, 1093 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 9bef4e4cec50..42542eb802ca 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -175,7 +175,10 @@ Parameters: vcpu id (apic id on x86) | |||
175 | Returns: vcpu fd on success, -1 on error | 175 | Returns: vcpu fd on success, -1 on error |
176 | 176 | ||
177 | This API adds a vcpu to a virtual machine. The vcpu id is a small integer | 177 | This API adds a vcpu to a virtual machine. The vcpu id is a small integer |
178 | in the range [0, max_vcpus). | 178 | in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the |
179 | KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time. | ||
180 | If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 | ||
181 | cpus max. | ||
179 | 182 | ||
180 | 4.8 KVM_GET_DIRTY_LOG (vm ioctl) | 183 | 4.8 KVM_GET_DIRTY_LOG (vm ioctl) |
181 | 184 | ||
@@ -261,7 +264,7 @@ See KVM_GET_REGS for the data structure. | |||
261 | 4.13 KVM_GET_SREGS | 264 | 4.13 KVM_GET_SREGS |
262 | 265 | ||
263 | Capability: basic | 266 | Capability: basic |
264 | Architectures: x86 | 267 | Architectures: x86, ppc |
265 | Type: vcpu ioctl | 268 | Type: vcpu ioctl |
266 | Parameters: struct kvm_sregs (out) | 269 | Parameters: struct kvm_sregs (out) |
267 | Returns: 0 on success, -1 on error | 270 | Returns: 0 on success, -1 on error |
@@ -279,6 +282,8 @@ struct kvm_sregs { | |||
279 | __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; | 282 | __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; |
280 | }; | 283 | }; |
281 | 284 | ||
285 | /* ppc -- see arch/powerpc/include/asm/kvm.h */ | ||
286 | |||
282 | interrupt_bitmap is a bitmap of pending external interrupts. At most | 287 | interrupt_bitmap is a bitmap of pending external interrupts. At most |
283 | one bit may be set. This interrupt has been acknowledged by the APIC | 288 | one bit may be set. This interrupt has been acknowledged by the APIC |
284 | but not yet injected into the cpu core. | 289 | but not yet injected into the cpu core. |
@@ -286,7 +291,7 @@ but not yet injected into the cpu core. | |||
286 | 4.14 KVM_SET_SREGS | 291 | 4.14 KVM_SET_SREGS |
287 | 292 | ||
288 | Capability: basic | 293 | Capability: basic |
289 | Architectures: x86 | 294 | Architectures: x86, ppc |
290 | Type: vcpu ioctl | 295 | Type: vcpu ioctl |
291 | Parameters: struct kvm_sregs (in) | 296 | Parameters: struct kvm_sregs (in) |
292 | Returns: 0 on success, -1 on error | 297 | Returns: 0 on success, -1 on error |
@@ -1263,6 +1268,29 @@ struct kvm_assigned_msix_entry { | |||
1263 | __u16 padding[3]; | 1268 | __u16 padding[3]; |
1264 | }; | 1269 | }; |
1265 | 1270 | ||
1271 | 4.54 KVM_SET_TSC_KHZ | ||
1272 | |||
1273 | Capability: KVM_CAP_TSC_CONTROL | ||
1274 | Architectures: x86 | ||
1275 | Type: vcpu ioctl | ||
1276 | Parameters: virtual tsc_khz | ||
1277 | Returns: 0 on success, -1 on error | ||
1278 | |||
1279 | Specifies the tsc frequency for the virtual machine. The unit of the | ||
1280 | frequency is KHz. | ||
1281 | |||
1282 | 4.55 KVM_GET_TSC_KHZ | ||
1283 | |||
1284 | Capability: KVM_CAP_GET_TSC_KHZ | ||
1285 | Architectures: x86 | ||
1286 | Type: vcpu ioctl | ||
1287 | Parameters: none | ||
1288 | Returns: virtual tsc-khz on success, negative value on error | ||
1289 | |||
1290 | Returns the tsc frequency of the guest. The unit of the return value is | ||
1291 | KHz. If the host has unstable tsc this ioctl returns -EIO instead as an | ||
1292 | error. | ||
1293 | |||
1266 | 5. The kvm_run structure | 1294 | 5. The kvm_run structure |
1267 | 1295 | ||
1268 | Application code obtains a pointer to the kvm_run structure by | 1296 | Application code obtains a pointer to the kvm_run structure by |
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h index f6c5617e16af..b214b5b0432d 100644 --- a/arch/ia64/kvm/vti.h +++ b/arch/ia64/kvm/vti.h | |||
@@ -83,13 +83,13 @@ | |||
83 | union vac { | 83 | union vac { |
84 | unsigned long value; | 84 | unsigned long value; |
85 | struct { | 85 | struct { |
86 | int a_int:1; | 86 | unsigned int a_int:1; |
87 | int a_from_int_cr:1; | 87 | unsigned int a_from_int_cr:1; |
88 | int a_to_int_cr:1; | 88 | unsigned int a_to_int_cr:1; |
89 | int a_from_psr:1; | 89 | unsigned int a_from_psr:1; |
90 | int a_from_cpuid:1; | 90 | unsigned int a_from_cpuid:1; |
91 | int a_cover:1; | 91 | unsigned int a_cover:1; |
92 | int a_bsw:1; | 92 | unsigned int a_bsw:1; |
93 | long reserved:57; | 93 | long reserved:57; |
94 | }; | 94 | }; |
95 | }; | 95 | }; |
@@ -97,12 +97,12 @@ union vac { | |||
97 | union vdc { | 97 | union vdc { |
98 | unsigned long value; | 98 | unsigned long value; |
99 | struct { | 99 | struct { |
100 | int d_vmsw:1; | 100 | unsigned int d_vmsw:1; |
101 | int d_extint:1; | 101 | unsigned int d_extint:1; |
102 | int d_ibr_dbr:1; | 102 | unsigned int d_ibr_dbr:1; |
103 | int d_pmc:1; | 103 | unsigned int d_pmc:1; |
104 | int d_to_pmd:1; | 104 | unsigned int d_to_pmd:1; |
105 | int d_itm:1; | 105 | unsigned int d_itm:1; |
106 | long reserved:58; | 106 | long reserved:58; |
107 | }; | 107 | }; |
108 | }; | 108 | }; |
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 18ea6963ad77..d2ca5ed3877b 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h | |||
@@ -45,6 +45,114 @@ struct kvm_regs { | |||
45 | __u64 gpr[32]; | 45 | __u64 gpr[32]; |
46 | }; | 46 | }; |
47 | 47 | ||
48 | #define KVM_SREGS_E_IMPL_NONE 0 | ||
49 | #define KVM_SREGS_E_IMPL_FSL 1 | ||
50 | |||
51 | #define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ | ||
52 | |||
53 | /* | ||
54 | * Feature bits indicate which sections of the sregs struct are valid, | ||
55 | * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers | ||
56 | * corresponding to unset feature bits will not be modified. This allows | ||
57 | * restoring a checkpoint made without that feature, while keeping the | ||
58 | * default values of the new registers. | ||
59 | * | ||
60 | * KVM_SREGS_E_BASE contains: | ||
61 | * CSRR0/1 (refers to SRR2/3 on 40x) | ||
62 | * ESR | ||
63 | * DEAR | ||
64 | * MCSR | ||
65 | * TSR | ||
66 | * TCR | ||
67 | * DEC | ||
68 | * TB | ||
69 | * VRSAVE (USPRG0) | ||
70 | */ | ||
71 | #define KVM_SREGS_E_BASE (1 << 0) | ||
72 | |||
73 | /* | ||
74 | * KVM_SREGS_E_ARCH206 contains: | ||
75 | * | ||
76 | * PIR | ||
77 | * MCSRR0/1 | ||
78 | * DECAR | ||
79 | * IVPR | ||
80 | */ | ||
81 | #define KVM_SREGS_E_ARCH206 (1 << 1) | ||
82 | |||
83 | /* | ||
84 | * Contains EPCR, plus the upper half of 64-bit registers | ||
85 | * that are 32-bit on 32-bit implementations. | ||
86 | */ | ||
87 | #define KVM_SREGS_E_64 (1 << 2) | ||
88 | |||
89 | #define KVM_SREGS_E_SPRG8 (1 << 3) | ||
90 | #define KVM_SREGS_E_MCIVPR (1 << 4) | ||
91 | |||
92 | /* | ||
93 | * IVORs are used -- contains IVOR0-15, plus additional IVORs | ||
94 | * in combination with an appropriate feature bit. | ||
95 | */ | ||
96 | #define KVM_SREGS_E_IVOR (1 << 5) | ||
97 | |||
98 | /* | ||
99 | * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. | ||
100 | * Also TLBnPS if MMUCFG[MAVN] = 1. | ||
101 | */ | ||
102 | #define KVM_SREGS_E_ARCH206_MMU (1 << 6) | ||
103 | |||
104 | /* DBSR, DBCR, IAC, DAC, DVC */ | ||
105 | #define KVM_SREGS_E_DEBUG (1 << 7) | ||
106 | |||
107 | /* Enhanced debug -- DSRR0/1, SPRG9 */ | ||
108 | #define KVM_SREGS_E_ED (1 << 8) | ||
109 | |||
110 | /* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ | ||
111 | #define KVM_SREGS_E_SPE (1 << 9) | ||
112 | |||
113 | /* External Proxy (EXP) -- EPR */ | ||
114 | #define KVM_SREGS_EXP (1 << 10) | ||
115 | |||
116 | /* External PID (E.PD) -- EPSC/EPLC */ | ||
117 | #define KVM_SREGS_E_PD (1 << 11) | ||
118 | |||
119 | /* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ | ||
120 | #define KVM_SREGS_E_PC (1 << 12) | ||
121 | |||
122 | /* Page table (E.PT) -- EPTCFG */ | ||
123 | #define KVM_SREGS_E_PT (1 << 13) | ||
124 | |||
125 | /* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ | ||
126 | #define KVM_SREGS_E_PM (1 << 14) | ||
127 | |||
128 | /* | ||
129 | * Special updates: | ||
130 | * | ||
131 | * Some registers may change even while a vcpu is not running. | ||
132 | * To avoid losing these changes, by default these registers are | ||
133 | * not updated by KVM_SET_SREGS. To force an update, set the bit | ||
134 | * in u.e.update_special corresponding to the register to be updated. | ||
135 | * | ||
136 | * The update_special field is zero on return from KVM_GET_SREGS. | ||
137 | * | ||
138 | * When restoring a checkpoint, the caller can set update_special | ||
139 | * to 0xffffffff to ensure that everything is restored, even new features | ||
140 | * that the caller doesn't know about. | ||
141 | */ | ||
142 | #define KVM_SREGS_E_UPDATE_MCSR (1 << 0) | ||
143 | #define KVM_SREGS_E_UPDATE_TSR (1 << 1) | ||
144 | #define KVM_SREGS_E_UPDATE_DEC (1 << 2) | ||
145 | #define KVM_SREGS_E_UPDATE_DBSR (1 << 3) | ||
146 | |||
147 | /* | ||
148 | * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a | ||
149 | * previous KVM_GET_REGS. | ||
150 | * | ||
151 | * Unless otherwise indicated, setting any register with KVM_SET_SREGS | ||
152 | * directly sets its value. It does not trigger any special semantics such | ||
153 | * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct | ||
154 | * just received from KVM_GET_SREGS is always a no-op. | ||
155 | */ | ||
48 | struct kvm_sregs { | 156 | struct kvm_sregs { |
49 | __u32 pvr; | 157 | __u32 pvr; |
50 | union { | 158 | union { |
@@ -62,6 +170,82 @@ struct kvm_sregs { | |||
62 | __u64 dbat[8]; | 170 | __u64 dbat[8]; |
63 | } ppc32; | 171 | } ppc32; |
64 | } s; | 172 | } s; |
173 | struct { | ||
174 | union { | ||
175 | struct { /* KVM_SREGS_E_IMPL_FSL */ | ||
176 | __u32 features; /* KVM_SREGS_E_FSL_ */ | ||
177 | __u32 svr; | ||
178 | __u64 mcar; | ||
179 | __u32 hid0; | ||
180 | |||
181 | /* KVM_SREGS_E_FSL_PIDn */ | ||
182 | __u32 pid1, pid2; | ||
183 | } fsl; | ||
184 | __u8 pad[256]; | ||
185 | } impl; | ||
186 | |||
187 | __u32 features; /* KVM_SREGS_E_ */ | ||
188 | __u32 impl_id; /* KVM_SREGS_E_IMPL_ */ | ||
189 | __u32 update_special; /* KVM_SREGS_E_UPDATE_ */ | ||
190 | __u32 pir; /* read-only */ | ||
191 | __u64 sprg8; | ||
192 | __u64 sprg9; /* E.ED */ | ||
193 | __u64 csrr0; | ||
194 | __u64 dsrr0; /* E.ED */ | ||
195 | __u64 mcsrr0; | ||
196 | __u32 csrr1; | ||
197 | __u32 dsrr1; /* E.ED */ | ||
198 | __u32 mcsrr1; | ||
199 | __u32 esr; | ||
200 | __u64 dear; | ||
201 | __u64 ivpr; | ||
202 | __u64 mcivpr; | ||
203 | __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */ | ||
204 | |||
205 | __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */ | ||
206 | __u32 tcr; | ||
207 | __u32 decar; | ||
208 | __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */ | ||
209 | |||
210 | /* | ||
211 | * Userspace can read TB directly, but the | ||
212 | * value reported here is consistent with "dec". | ||
213 | * | ||
214 | * Read-only. | ||
215 | */ | ||
216 | __u64 tb; | ||
217 | |||
218 | __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ | ||
219 | __u32 dbcr[3]; | ||
220 | __u32 iac[4]; | ||
221 | __u32 dac[2]; | ||
222 | __u32 dvc[2]; | ||
223 | __u8 num_iac; /* read-only */ | ||
224 | __u8 num_dac; /* read-only */ | ||
225 | __u8 num_dvc; /* read-only */ | ||
226 | __u8 pad; | ||
227 | |||
228 | __u32 epr; /* EXP */ | ||
229 | __u32 vrsave; /* a.k.a. USPRG0 */ | ||
230 | __u32 epcr; /* KVM_SREGS_E_64 */ | ||
231 | |||
232 | __u32 mas0; | ||
233 | __u32 mas1; | ||
234 | __u64 mas2; | ||
235 | __u64 mas7_3; | ||
236 | __u32 mas4; | ||
237 | __u32 mas6; | ||
238 | |||
239 | __u32 ivor_low[16]; /* IVOR0-15 */ | ||
240 | __u32 ivor_high[18]; /* IVOR32+, plus room to expand */ | ||
241 | |||
242 | __u32 mmucfg; /* read-only */ | ||
243 | __u32 eptcfg; /* E.PT, read-only */ | ||
244 | __u32 tlbcfg[4];/* read-only */ | ||
245 | __u32 tlbps[4]; /* read-only */ | ||
246 | |||
247 | __u32 eplc, epsc; /* E.PD */ | ||
248 | } e; | ||
65 | __u8 pad[1020]; | 249 | __u8 pad[1020]; |
66 | } u; | 250 | } u; |
67 | }; | 251 | }; |
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h index d22d39942a92..a0e57618ff33 100644 --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h | |||
@@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) | |||
61 | return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); | 61 | return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); |
62 | } | 62 | } |
63 | 63 | ||
64 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); | ||
65 | void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); | 64 | void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); |
66 | void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); | 65 | void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); |
67 | 66 | ||
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 7fea26fffb25..7a2a565f88c4 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h | |||
@@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 { | |||
43 | 43 | ||
44 | u32 host_pid[E500_PID_NUM]; | 44 | u32 host_pid[E500_PID_NUM]; |
45 | u32 pid[E500_PID_NUM]; | 45 | u32 pid[E500_PID_NUM]; |
46 | u32 svr; | ||
46 | 47 | ||
47 | u32 mas0; | 48 | u32 mas0; |
48 | u32 mas1; | 49 | u32 mas1; |
@@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 { | |||
58 | u32 hid1; | 59 | u32 hid1; |
59 | u32 tlb0cfg; | 60 | u32 tlb0cfg; |
60 | u32 tlb1cfg; | 61 | u32 tlb1cfg; |
62 | u64 mcar; | ||
61 | 63 | ||
62 | struct kvm_vcpu vcpu; | 64 | struct kvm_vcpu vcpu; |
63 | }; | 65 | }; |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bba3b9b72a39..186f150b9b89 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -223,6 +223,7 @@ struct kvm_vcpu_arch { | |||
223 | ulong hflags; | 223 | ulong hflags; |
224 | ulong guest_owned_ext; | 224 | ulong guest_owned_ext; |
225 | #endif | 225 | #endif |
226 | u32 vrsave; /* also USPRG0 */ | ||
226 | u32 mmucr; | 227 | u32 mmucr; |
227 | ulong sprg4; | 228 | ulong sprg4; |
228 | ulong sprg5; | 229 | ulong sprg5; |
@@ -232,6 +233,9 @@ struct kvm_vcpu_arch { | |||
232 | ulong csrr1; | 233 | ulong csrr1; |
233 | ulong dsrr0; | 234 | ulong dsrr0; |
234 | ulong dsrr1; | 235 | ulong dsrr1; |
236 | ulong mcsrr0; | ||
237 | ulong mcsrr1; | ||
238 | ulong mcsr; | ||
235 | ulong esr; | 239 | ulong esr; |
236 | u32 dec; | 240 | u32 dec; |
237 | u32 decar; | 241 | u32 decar; |
@@ -255,6 +259,7 @@ struct kvm_vcpu_arch { | |||
255 | u32 dbsr; | 259 | u32 dbsr; |
256 | 260 | ||
257 | #ifdef CONFIG_KVM_EXIT_TIMING | 261 | #ifdef CONFIG_KVM_EXIT_TIMING |
262 | struct mutex exit_timing_lock; | ||
258 | struct kvmppc_exit_timing timing_exit; | 263 | struct kvmppc_exit_timing timing_exit; |
259 | struct kvmppc_exit_timing timing_last_enter; | 264 | struct kvmppc_exit_timing timing_last_enter; |
260 | u32 last_exit_type; | 265 | u32 last_exit_type; |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ecb3bc74c344..9345238edecf 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, | |||
61 | struct kvm_vcpu *vcpu); | 61 | struct kvm_vcpu *vcpu); |
62 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); | 62 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); |
63 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); | 63 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); |
64 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); | ||
64 | 65 | ||
65 | /* Core-specific hooks */ | 66 | /* Core-specific hooks */ |
66 | 67 | ||
@@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) | |||
142 | return r; | 143 | return r; |
143 | } | 144 | } |
144 | 145 | ||
146 | void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | ||
147 | int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | ||
148 | |||
149 | void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | ||
150 | int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | ||
151 | |||
152 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); | ||
153 | |||
145 | #endif /* __POWERPC_KVM_PPC_H__ */ | 154 | #endif /* __POWERPC_KVM_PPC_H__ */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6887661ac072..36e1c8a29be8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -396,6 +396,7 @@ int main(void) | |||
396 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); | 396 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); |
397 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); | 397 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); |
398 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); | 398 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); |
399 | DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); | ||
399 | DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); | 400 | DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); |
400 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); | 401 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); |
401 | DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); | 402 | DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); |
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 74d0e7421143..da3a1225c0ac 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c | |||
@@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, | |||
107 | return 0; | 107 | return 0; |
108 | } | 108 | } |
109 | 109 | ||
110 | void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | ||
111 | { | ||
112 | kvmppc_get_sregs_ivor(vcpu, sregs); | ||
113 | } | ||
114 | |||
115 | int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | ||
116 | { | ||
117 | return kvmppc_set_sregs_ivor(vcpu, sregs); | ||
118 | } | ||
119 | |||
110 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | 120 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) |
111 | { | 121 | { |
112 | struct kvmppc_vcpu_44x *vcpu_44x; | 122 | struct kvmppc_vcpu_44x *vcpu_44x; |
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 65ea083a5b27..549bb2c9a47a 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c | |||
@@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
158 | emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); | 158 | emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); |
159 | } | 159 | } |
160 | 160 | ||
161 | kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); | ||
162 | return emulated; | 161 | return emulated; |
163 | } | 162 | } |
164 | 163 | ||
@@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
179 | emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); | 178 | emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); |
180 | } | 179 | } |
181 | 180 | ||
182 | kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); | ||
183 | return emulated; | 181 | return emulated; |
184 | } | 182 | } |
185 | 183 | ||
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ef76acb455c3..8462b3a1c1c7 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
569 | kvmppc_set_msr(vcpu, regs->msr); | 569 | kvmppc_set_msr(vcpu, regs->msr); |
570 | vcpu->arch.shared->srr0 = regs->srr0; | 570 | vcpu->arch.shared->srr0 = regs->srr0; |
571 | vcpu->arch.shared->srr1 = regs->srr1; | 571 | vcpu->arch.shared->srr1 = regs->srr1; |
572 | kvmppc_set_pid(vcpu, regs->pid); | ||
572 | vcpu->arch.shared->sprg0 = regs->sprg0; | 573 | vcpu->arch.shared->sprg0 = regs->sprg0; |
573 | vcpu->arch.shared->sprg1 = regs->sprg1; | 574 | vcpu->arch.shared->sprg1 = regs->sprg1; |
574 | vcpu->arch.shared->sprg2 = regs->sprg2; | 575 | vcpu->arch.shared->sprg2 = regs->sprg2; |
@@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
584 | return 0; | 585 | return 0; |
585 | } | 586 | } |
586 | 587 | ||
588 | static void get_sregs_base(struct kvm_vcpu *vcpu, | ||
589 | struct kvm_sregs *sregs) | ||
590 | { | ||
591 | u64 tb = get_tb(); | ||
592 | |||
593 | sregs->u.e.features |= KVM_SREGS_E_BASE; | ||
594 | |||
595 | sregs->u.e.csrr0 = vcpu->arch.csrr0; | ||
596 | sregs->u.e.csrr1 = vcpu->arch.csrr1; | ||
597 | sregs->u.e.mcsr = vcpu->arch.mcsr; | ||
598 | sregs->u.e.esr = vcpu->arch.esr; | ||
599 | sregs->u.e.dear = vcpu->arch.shared->dar; | ||
600 | sregs->u.e.tsr = vcpu->arch.tsr; | ||
601 | sregs->u.e.tcr = vcpu->arch.tcr; | ||
602 | sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); | ||
603 | sregs->u.e.tb = tb; | ||
604 | sregs->u.e.vrsave = vcpu->arch.vrsave; | ||
605 | } | ||
606 | |||
607 | static int set_sregs_base(struct kvm_vcpu *vcpu, | ||
608 | struct kvm_sregs *sregs) | ||
609 | { | ||
610 | if (!(sregs->u.e.features & KVM_SREGS_E_BASE)) | ||
611 | return 0; | ||
612 | |||
613 | vcpu->arch.csrr0 = sregs->u.e.csrr0; | ||
614 | vcpu->arch.csrr1 = sregs->u.e.csrr1; | ||
615 | vcpu->arch.mcsr = sregs->u.e.mcsr; | ||
616 | vcpu->arch.esr = sregs->u.e.esr; | ||
617 | vcpu->arch.shared->dar = sregs->u.e.dear; | ||
618 | vcpu->arch.vrsave = sregs->u.e.vrsave; | ||
619 | vcpu->arch.tcr = sregs->u.e.tcr; | ||
620 | |||
621 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) | ||
622 | vcpu->arch.dec = sregs->u.e.dec; | ||
623 | |||
624 | kvmppc_emulate_dec(vcpu); | ||
625 | |||
626 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { | ||
627 | /* | ||
628 | * FIXME: existing KVM timer handling is incomplete. | ||
629 | * TSR cannot be read by the guest, and its value in | ||
630 | * vcpu->arch is always zero. For now, just handle | ||
631 | * the case where the caller is trying to inject a | ||
632 | * decrementer interrupt. | ||
633 | */ | ||
634 | |||
635 | if ((sregs->u.e.tsr & TSR_DIS) && | ||
636 | (vcpu->arch.tcr & TCR_DIE)) | ||
637 | kvmppc_core_queue_dec(vcpu); | ||
638 | } | ||
639 | |||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | static void get_sregs_arch206(struct kvm_vcpu *vcpu, | ||
644 | struct kvm_sregs *sregs) | ||
645 | { | ||
646 | sregs->u.e.features |= KVM_SREGS_E_ARCH206; | ||
647 | |||
648 | sregs->u.e.pir = 0; | ||
649 | sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; | ||
650 | sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; | ||
651 | sregs->u.e.decar = vcpu->arch.decar; | ||
652 | sregs->u.e.ivpr = vcpu->arch.ivpr; | ||
653 | } | ||
654 | |||
655 | static int set_sregs_arch206(struct kvm_vcpu *vcpu, | ||
656 | struct kvm_sregs *sregs) | ||
657 | { | ||
658 | if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) | ||
659 | return 0; | ||
660 | |||
661 | if (sregs->u.e.pir != 0) | ||
662 | return -EINVAL; | ||
663 | |||
664 | vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; | ||
665 | vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1; | ||
666 | vcpu->arch.decar = sregs->u.e.decar; | ||
667 | vcpu->arch.ivpr = sregs->u.e.ivpr; | ||
668 | |||
669 | return 0; | ||
670 | } | ||
671 | |||
672 | void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | ||
673 | { | ||
674 | sregs->u.e.features |= KVM_SREGS_E_IVOR; | ||
675 | |||
676 | sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; | ||
677 | sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; | ||
678 | sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; | ||
679 | sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; | ||
680 | sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; | ||
681 | sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; | ||
682 | sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; | ||
683 | sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; | ||
684 | sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; | ||
685 | sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; | ||
686 | sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; | ||
687 | sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; | ||
688 | sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; | ||
689 | sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; | ||
690 | sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; | ||
691 | sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; | ||
692 | } | ||
693 | |||
694 | int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | ||
695 | { | ||
696 | if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) | ||
697 | return 0; | ||
698 | |||
699 | vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0]; | ||
700 | vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1]; | ||
701 | vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2]; | ||
702 | vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3]; | ||
703 | vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4]; | ||
704 | vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5]; | ||
705 | vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6]; | ||
706 | vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7]; | ||
707 | vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8]; | ||
708 | vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9]; | ||
709 | vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10]; | ||
710 | vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11]; | ||
711 | vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12]; | ||
712 | vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13]; | ||
713 | vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14]; | ||
714 | vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15]; | ||
715 | |||
716 | return 0; | ||
717 | } | ||
718 | |||
587 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 719 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
588 | struct kvm_sregs *sregs) | 720 | struct kvm_sregs *sregs) |
589 | { | 721 | { |
590 | return -ENOTSUPP; | 722 | sregs->pvr = vcpu->arch.pvr; |
723 | |||
724 | get_sregs_base(vcpu, sregs); | ||
725 | get_sregs_arch206(vcpu, sregs); | ||
726 | kvmppc_core_get_sregs(vcpu, sregs); | ||
727 | return 0; | ||
591 | } | 728 | } |
592 | 729 | ||
593 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 730 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
594 | struct kvm_sregs *sregs) | 731 | struct kvm_sregs *sregs) |
595 | { | 732 | { |
596 | return -ENOTSUPP; | 733 | int ret; |
734 | |||
735 | if (vcpu->arch.pvr != sregs->pvr) | ||
736 | return -EINVAL; | ||
737 | |||
738 | ret = set_sregs_base(vcpu, sregs); | ||
739 | if (ret < 0) | ||
740 | return ret; | ||
741 | |||
742 | ret = set_sregs_arch206(vcpu, sregs); | ||
743 | if (ret < 0) | ||
744 | return ret; | ||
745 | |||
746 | return kvmppc_core_set_sregs(vcpu, sregs); | ||
597 | } | 747 | } |
598 | 748 | ||
599 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 749 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 1cc471faac2d..b58ccae95904 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S | |||
@@ -380,7 +380,6 @@ lightweight_exit: | |||
380 | * because host interrupt handlers would get confused. */ | 380 | * because host interrupt handlers would get confused. */ |
381 | lwz r1, VCPU_GPR(r1)(r4) | 381 | lwz r1, VCPU_GPR(r1)(r4) |
382 | 382 | ||
383 | /* XXX handle USPRG0 */ | ||
384 | /* Host interrupt handlers may have clobbered these guest-readable | 383 | /* Host interrupt handlers may have clobbered these guest-readable |
385 | * SPRGs, so we need to reload them here with the guest's values. */ | 384 | * SPRGs, so we need to reload them here with the guest's values. */ |
386 | lwz r3, VCPU_SPRG4(r4) | 385 | lwz r3, VCPU_SPRG4(r4) |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index e3768ee9b595..318dbc61ba44 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) | |||
63 | 63 | ||
64 | /* Registers init */ | 64 | /* Registers init */ |
65 | vcpu->arch.pvr = mfspr(SPRN_PVR); | 65 | vcpu->arch.pvr = mfspr(SPRN_PVR); |
66 | vcpu_e500->svr = mfspr(SPRN_SVR); | ||
66 | 67 | ||
67 | /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ | 68 | /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ |
68 | vcpu->vcpu_id = 0; | 69 | vcpu->vcpu_id = 0; |
@@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, | |||
96 | return 0; | 97 | return 0; |
97 | } | 98 | } |
98 | 99 | ||
100 | void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | ||
101 | { | ||
102 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
103 | |||
104 | sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE | | ||
105 | KVM_SREGS_E_PM; | ||
106 | sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; | ||
107 | |||
108 | sregs->u.e.impl.fsl.features = 0; | ||
109 | sregs->u.e.impl.fsl.svr = vcpu_e500->svr; | ||
110 | sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; | ||
111 | sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; | ||
112 | |||
113 | sregs->u.e.mas0 = vcpu_e500->mas0; | ||
114 | sregs->u.e.mas1 = vcpu_e500->mas1; | ||
115 | sregs->u.e.mas2 = vcpu_e500->mas2; | ||
116 | sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; | ||
117 | sregs->u.e.mas4 = vcpu_e500->mas4; | ||
118 | sregs->u.e.mas6 = vcpu_e500->mas6; | ||
119 | |||
120 | sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); | ||
121 | sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; | ||
122 | sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; | ||
123 | sregs->u.e.tlbcfg[2] = 0; | ||
124 | sregs->u.e.tlbcfg[3] = 0; | ||
125 | |||
126 | sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; | ||
127 | sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; | ||
128 | sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; | ||
129 | sregs->u.e.ivor_high[3] = | ||
130 | vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; | ||
131 | |||
132 | kvmppc_get_sregs_ivor(vcpu, sregs); | ||
133 | } | ||
134 | |||
135 | int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | ||
136 | { | ||
137 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
138 | |||
139 | if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { | ||
140 | vcpu_e500->svr = sregs->u.e.impl.fsl.svr; | ||
141 | vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; | ||
142 | vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; | ||
143 | } | ||
144 | |||
145 | if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { | ||
146 | vcpu_e500->mas0 = sregs->u.e.mas0; | ||
147 | vcpu_e500->mas1 = sregs->u.e.mas1; | ||
148 | vcpu_e500->mas2 = sregs->u.e.mas2; | ||
149 | vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; | ||
150 | vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; | ||
151 | vcpu_e500->mas4 = sregs->u.e.mas4; | ||
152 | vcpu_e500->mas6 = sregs->u.e.mas6; | ||
153 | } | ||
154 | |||
155 | if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) | ||
156 | return 0; | ||
157 | |||
158 | if (sregs->u.e.features & KVM_SREGS_E_SPE) { | ||
159 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = | ||
160 | sregs->u.e.ivor_high[0]; | ||
161 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = | ||
162 | sregs->u.e.ivor_high[1]; | ||
163 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = | ||
164 | sregs->u.e.ivor_high[2]; | ||
165 | } | ||
166 | |||
167 | if (sregs->u.e.features & KVM_SREGS_E_PM) { | ||
168 | vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = | ||
169 | sregs->u.e.ivor_high[3]; | ||
170 | } | ||
171 | |||
172 | return kvmppc_set_sregs_ivor(vcpu, sregs); | ||
173 | } | ||
174 | |||
99 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | 175 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) |
100 | { | 176 | { |
101 | struct kvmppc_vcpu_e500 *vcpu_e500; | 177 | struct kvmppc_vcpu_e500 *vcpu_e500; |
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 8e3edfbc9634..69cd665a0caf 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. | 2 | * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * Author: Yu Liu, <yu.liu@freescale.com> | 4 | * Author: Yu Liu, <yu.liu@freescale.com> |
5 | * | 5 | * |
@@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
78 | 78 | ||
79 | switch (sprn) { | 79 | switch (sprn) { |
80 | case SPRN_PID: | 80 | case SPRN_PID: |
81 | vcpu_e500->pid[0] = vcpu->arch.shadow_pid = | 81 | kvmppc_set_pid(vcpu, spr_val); |
82 | vcpu->arch.pid = spr_val; | ||
83 | break; | 82 | break; |
84 | case SPRN_PID1: | 83 | case SPRN_PID1: |
85 | vcpu_e500->pid[1] = spr_val; break; | 84 | vcpu_e500->pid[1] = spr_val; break; |
@@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
175 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; | 174 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; |
176 | case SPRN_HID1: | 175 | case SPRN_HID1: |
177 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; | 176 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; |
177 | case SPRN_SVR: | ||
178 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; | ||
178 | 179 | ||
179 | case SPRN_MMUCSR0: | 180 | case SPRN_MMUCSR0: |
180 | kvmppc_set_gpr(vcpu, rt, 0); break; | 181 | kvmppc_set_gpr(vcpu, rt, 0); break; |
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d6d6d47a75a9..b18fe353397d 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. | 2 | * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * Author: Yu Liu, yu.liu@freescale.com | 4 | * Author: Yu Liu, yu.liu@freescale.com |
5 | * | 5 | * |
@@ -24,6 +24,7 @@ | |||
24 | #include "../mm/mmu_decl.h" | 24 | #include "../mm/mmu_decl.h" |
25 | #include "e500_tlb.h" | 25 | #include "e500_tlb.h" |
26 | #include "trace.h" | 26 | #include "trace.h" |
27 | #include "timing.h" | ||
27 | 28 | ||
28 | #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) | 29 | #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) |
29 | 30 | ||
@@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) | |||
506 | vcpu_e500->mas7 = 0; | 507 | vcpu_e500->mas7 = 0; |
507 | } | 508 | } |
508 | 509 | ||
510 | kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); | ||
509 | return EMULATE_DONE; | 511 | return EMULATE_DONE; |
510 | } | 512 | } |
511 | 513 | ||
@@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) | |||
571 | write_host_tlbe(vcpu_e500, stlbsel, sesel); | 573 | write_host_tlbe(vcpu_e500, stlbsel, sesel); |
572 | } | 574 | } |
573 | 575 | ||
576 | kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); | ||
574 | return EMULATE_DONE; | 577 | return EMULATE_DONE; |
575 | } | 578 | } |
576 | 579 | ||
@@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, | |||
672 | return -1; | 675 | return -1; |
673 | } | 676 | } |
674 | 677 | ||
678 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) | ||
679 | { | ||
680 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
681 | |||
682 | vcpu_e500->pid[0] = vcpu->arch.shadow_pid = | ||
683 | vcpu->arch.pid = pid; | ||
684 | } | ||
685 | |||
675 | void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) | 686 | void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) |
676 | { | 687 | { |
677 | struct tlbe *tlbe; | 688 | struct tlbe *tlbe; |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c64fd2909bb2..141dce3c6810 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | |||
114 | } | 114 | } |
115 | } | 115 | } |
116 | 116 | ||
117 | u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) | ||
118 | { | ||
119 | u64 jd = tb - vcpu->arch.dec_jiffies; | ||
120 | return vcpu->arch.dec - jd; | ||
121 | } | ||
122 | |||
117 | /* XXX to do: | 123 | /* XXX to do: |
118 | * lhax | 124 | * lhax |
119 | * lhaux | 125 | * lhaux |
@@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
279 | 285 | ||
280 | case SPRN_DEC: | 286 | case SPRN_DEC: |
281 | { | 287 | { |
282 | u64 jd = get_tb() - vcpu->arch.dec_jiffies; | 288 | kvmppc_set_gpr(vcpu, rt, |
283 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); | 289 | kvmppc_get_dec(vcpu, get_tb())); |
284 | pr_debug("mfDEC: %x - %llx = %lx\n", | ||
285 | vcpu->arch.dec, jd, | ||
286 | kvmppc_get_gpr(vcpu, rt)); | ||
287 | break; | 290 | break; |
288 | } | 291 | } |
289 | default: | 292 | default: |
@@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
294 | } | 297 | } |
295 | break; | 298 | break; |
296 | } | 299 | } |
300 | kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); | ||
297 | break; | 301 | break; |
298 | 302 | ||
299 | case OP_31_XOP_STHX: | 303 | case OP_31_XOP_STHX: |
@@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
363 | printk("mtspr: unknown spr %x\n", sprn); | 367 | printk("mtspr: unknown spr %x\n", sprn); |
364 | break; | 368 | break; |
365 | } | 369 | } |
370 | kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); | ||
366 | break; | 371 | break; |
367 | 372 | ||
368 | case OP_31_XOP_DCBI: | 373 | case OP_31_XOP_DCBI: |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 99758460efde..616dd516ca1f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
175 | int r; | 175 | int r; |
176 | 176 | ||
177 | switch (ext) { | 177 | switch (ext) { |
178 | #ifdef CONFIG_BOOKE | ||
179 | case KVM_CAP_PPC_BOOKE_SREGS: | ||
180 | #else | ||
178 | case KVM_CAP_PPC_SEGSTATE: | 181 | case KVM_CAP_PPC_SEGSTATE: |
182 | #endif | ||
179 | case KVM_CAP_PPC_PAIRED_SINGLES: | 183 | case KVM_CAP_PPC_PAIRED_SINGLES: |
180 | case KVM_CAP_PPC_UNSET_IRQ: | 184 | case KVM_CAP_PPC_UNSET_IRQ: |
181 | case KVM_CAP_PPC_IRQ_LEVEL: | 185 | case KVM_CAP_PPC_IRQ_LEVEL: |
@@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
284 | tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); | 288 | tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); |
285 | vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; | 289 | vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; |
286 | 290 | ||
291 | #ifdef CONFIG_KVM_EXIT_TIMING | ||
292 | mutex_init(&vcpu->arch.exit_timing_lock); | ||
293 | #endif | ||
294 | |||
287 | return 0; | 295 | return 0; |
288 | } | 296 | } |
289 | 297 | ||
@@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
294 | 302 | ||
295 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 303 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
296 | { | 304 | { |
305 | #ifdef CONFIG_BOOKE | ||
306 | /* | ||
307 | * vrsave (formerly usprg0) isn't used by Linux, but may | ||
308 | * be used by the guest. | ||
309 | * | ||
310 | * On non-booke this is associated with Altivec and | ||
311 | * is handled by code in book3s.c. | ||
312 | */ | ||
313 | mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); | ||
314 | #endif | ||
297 | kvmppc_core_vcpu_load(vcpu, cpu); | 315 | kvmppc_core_vcpu_load(vcpu, cpu); |
298 | } | 316 | } |
299 | 317 | ||
300 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 318 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
301 | { | 319 | { |
302 | kvmppc_core_vcpu_put(vcpu); | 320 | kvmppc_core_vcpu_put(vcpu); |
321 | #ifdef CONFIG_BOOKE | ||
322 | vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); | ||
323 | #endif | ||
303 | } | 324 | } |
304 | 325 | ||
305 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | 326 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, |
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index a021f5827a33..319177df9587 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c | |||
@@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) | |||
34 | { | 34 | { |
35 | int i; | 35 | int i; |
36 | 36 | ||
37 | /* pause guest execution to avoid concurrent updates */ | 37 | /* Take a lock to avoid concurrent updates */ |
38 | mutex_lock(&vcpu->mutex); | 38 | mutex_lock(&vcpu->arch.exit_timing_lock); |
39 | 39 | ||
40 | vcpu->arch.last_exit_type = 0xDEAD; | 40 | vcpu->arch.last_exit_type = 0xDEAD; |
41 | for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { | 41 | for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { |
@@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) | |||
49 | vcpu->arch.timing_exit.tv64 = 0; | 49 | vcpu->arch.timing_exit.tv64 = 0; |
50 | vcpu->arch.timing_last_enter.tv64 = 0; | 50 | vcpu->arch.timing_last_enter.tv64 = 0; |
51 | 51 | ||
52 | mutex_unlock(&vcpu->mutex); | 52 | mutex_unlock(&vcpu->arch.exit_timing_lock); |
53 | } | 53 | } |
54 | 54 | ||
55 | static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) | 55 | static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) |
@@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) | |||
65 | return; | 65 | return; |
66 | } | 66 | } |
67 | 67 | ||
68 | mutex_lock(&vcpu->arch.exit_timing_lock); | ||
69 | |||
68 | vcpu->arch.timing_count_type[type]++; | 70 | vcpu->arch.timing_count_type[type]++; |
69 | 71 | ||
70 | /* sum */ | 72 | /* sum */ |
@@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) | |||
93 | vcpu->arch.timing_min_duration[type] = duration; | 95 | vcpu->arch.timing_min_duration[type] = duration; |
94 | if (unlikely(duration > vcpu->arch.timing_max_duration[type])) | 96 | if (unlikely(duration > vcpu->arch.timing_max_duration[type])) |
95 | vcpu->arch.timing_max_duration[type] = duration; | 97 | vcpu->arch.timing_max_duration[type] = duration; |
98 | |||
99 | mutex_unlock(&vcpu->arch.exit_timing_lock); | ||
96 | } | 100 | } |
97 | 101 | ||
98 | void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) | 102 | void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) |
@@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) | |||
147 | { | 151 | { |
148 | struct kvm_vcpu *vcpu = m->private; | 152 | struct kvm_vcpu *vcpu = m->private; |
149 | int i; | 153 | int i; |
154 | u64 min, max, sum, sum_quad; | ||
150 | 155 | ||
151 | seq_printf(m, "%s", "type count min max sum sum_squared\n"); | 156 | seq_printf(m, "%s", "type count min max sum sum_squared\n"); |
152 | 157 | ||
158 | |||
153 | for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { | 159 | for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { |
160 | |||
161 | min = vcpu->arch.timing_min_duration[i]; | ||
162 | do_div(min, tb_ticks_per_usec); | ||
163 | max = vcpu->arch.timing_max_duration[i]; | ||
164 | do_div(max, tb_ticks_per_usec); | ||
165 | sum = vcpu->arch.timing_sum_duration[i]; | ||
166 | do_div(sum, tb_ticks_per_usec); | ||
167 | sum_quad = vcpu->arch.timing_sum_quad_duration[i]; | ||
168 | do_div(sum_quad, tb_ticks_per_usec); | ||
169 | |||
154 | seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", | 170 | seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", |
155 | kvm_exit_names[i], | 171 | kvm_exit_names[i], |
156 | vcpu->arch.timing_count_type[i], | 172 | vcpu->arch.timing_count_type[i], |
157 | vcpu->arch.timing_min_duration[i], | 173 | min, |
158 | vcpu->arch.timing_max_duration[i], | 174 | max, |
159 | vcpu->arch.timing_sum_duration[i], | 175 | sum, |
160 | vcpu->arch.timing_sum_quad_duration[i]); | 176 | sum_quad); |
177 | |||
161 | } | 178 | } |
162 | return 0; | 179 | return 0; |
163 | } | 180 | } |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0f5213564326..0049211959c0 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #include <asm/desc_defs.h> | 14 | #include <asm/desc_defs.h> |
15 | 15 | ||
16 | struct x86_emulate_ctxt; | 16 | struct x86_emulate_ctxt; |
17 | enum x86_intercept; | ||
18 | enum x86_intercept_stage; | ||
17 | 19 | ||
18 | struct x86_exception { | 20 | struct x86_exception { |
19 | u8 vector; | 21 | u8 vector; |
@@ -24,6 +26,24 @@ struct x86_exception { | |||
24 | }; | 26 | }; |
25 | 27 | ||
26 | /* | 28 | /* |
29 | * This struct is used to carry enough information from the instruction | ||
30 | * decoder to main KVM so that a decision can be made whether the | ||
31 | * instruction needs to be intercepted or not. | ||
32 | */ | ||
33 | struct x86_instruction_info { | ||
34 | u8 intercept; /* which intercept */ | ||
35 | u8 rep_prefix; /* rep prefix? */ | ||
36 | u8 modrm_mod; /* mod part of modrm */ | ||
37 | u8 modrm_reg; /* index of register used */ | ||
38 | u8 modrm_rm; /* rm part of modrm */ | ||
39 | u64 src_val; /* value of source operand */ | ||
40 | u8 src_bytes; /* size of source operand */ | ||
41 | u8 dst_bytes; /* size of destination operand */ | ||
42 | u8 ad_bytes; /* size of src/dst address */ | ||
43 | u64 next_rip; /* rip following the instruction */ | ||
44 | }; | ||
45 | |||
46 | /* | ||
27 | * x86_emulate_ops: | 47 | * x86_emulate_ops: |
28 | * | 48 | * |
29 | * These operations represent the instruction emulator's interface to memory. | 49 | * These operations represent the instruction emulator's interface to memory. |
@@ -62,6 +82,7 @@ struct x86_exception { | |||
62 | #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ | 82 | #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ |
63 | #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ | 83 | #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ |
64 | #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ | 84 | #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ |
85 | #define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ | ||
65 | 86 | ||
66 | struct x86_emulate_ops { | 87 | struct x86_emulate_ops { |
67 | /* | 88 | /* |
@@ -71,8 +92,9 @@ struct x86_emulate_ops { | |||
71 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 92 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
72 | * @bytes: [IN ] Number of bytes to read from memory. | 93 | * @bytes: [IN ] Number of bytes to read from memory. |
73 | */ | 94 | */ |
74 | int (*read_std)(unsigned long addr, void *val, | 95 | int (*read_std)(struct x86_emulate_ctxt *ctxt, |
75 | unsigned int bytes, struct kvm_vcpu *vcpu, | 96 | unsigned long addr, void *val, |
97 | unsigned int bytes, | ||
76 | struct x86_exception *fault); | 98 | struct x86_exception *fault); |
77 | 99 | ||
78 | /* | 100 | /* |
@@ -82,8 +104,8 @@ struct x86_emulate_ops { | |||
82 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. | 104 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. |
83 | * @bytes: [IN ] Number of bytes to write to memory. | 105 | * @bytes: [IN ] Number of bytes to write to memory. |
84 | */ | 106 | */ |
85 | int (*write_std)(unsigned long addr, void *val, | 107 | int (*write_std)(struct x86_emulate_ctxt *ctxt, |
86 | unsigned int bytes, struct kvm_vcpu *vcpu, | 108 | unsigned long addr, void *val, unsigned int bytes, |
87 | struct x86_exception *fault); | 109 | struct x86_exception *fault); |
88 | /* | 110 | /* |
89 | * fetch: Read bytes of standard (non-emulated/special) memory. | 111 | * fetch: Read bytes of standard (non-emulated/special) memory. |
@@ -92,8 +114,8 @@ struct x86_emulate_ops { | |||
92 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 114 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
93 | * @bytes: [IN ] Number of bytes to read from memory. | 115 | * @bytes: [IN ] Number of bytes to read from memory. |
94 | */ | 116 | */ |
95 | int (*fetch)(unsigned long addr, void *val, | 117 | int (*fetch)(struct x86_emulate_ctxt *ctxt, |
96 | unsigned int bytes, struct kvm_vcpu *vcpu, | 118 | unsigned long addr, void *val, unsigned int bytes, |
97 | struct x86_exception *fault); | 119 | struct x86_exception *fault); |
98 | 120 | ||
99 | /* | 121 | /* |
@@ -102,11 +124,9 @@ struct x86_emulate_ops { | |||
102 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 124 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
103 | * @bytes: [IN ] Number of bytes to read from memory. | 125 | * @bytes: [IN ] Number of bytes to read from memory. |
104 | */ | 126 | */ |
105 | int (*read_emulated)(unsigned long addr, | 127 | int (*read_emulated)(struct x86_emulate_ctxt *ctxt, |
106 | void *val, | 128 | unsigned long addr, void *val, unsigned int bytes, |
107 | unsigned int bytes, | 129 | struct x86_exception *fault); |
108 | struct x86_exception *fault, | ||
109 | struct kvm_vcpu *vcpu); | ||
110 | 130 | ||
111 | /* | 131 | /* |
112 | * write_emulated: Write bytes to emulated/special memory area. | 132 | * write_emulated: Write bytes to emulated/special memory area. |
@@ -115,11 +135,10 @@ struct x86_emulate_ops { | |||
115 | * required). | 135 | * required). |
116 | * @bytes: [IN ] Number of bytes to write to memory. | 136 | * @bytes: [IN ] Number of bytes to write to memory. |
117 | */ | 137 | */ |
118 | int (*write_emulated)(unsigned long addr, | 138 | int (*write_emulated)(struct x86_emulate_ctxt *ctxt, |
119 | const void *val, | 139 | unsigned long addr, const void *val, |
120 | unsigned int bytes, | 140 | unsigned int bytes, |
121 | struct x86_exception *fault, | 141 | struct x86_exception *fault); |
122 | struct kvm_vcpu *vcpu); | ||
123 | 142 | ||
124 | /* | 143 | /* |
125 | * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an | 144 | * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an |
@@ -129,40 +148,54 @@ struct x86_emulate_ops { | |||
129 | * @new: [IN ] Value to write to @addr. | 148 | * @new: [IN ] Value to write to @addr. |
130 | * @bytes: [IN ] Number of bytes to access using CMPXCHG. | 149 | * @bytes: [IN ] Number of bytes to access using CMPXCHG. |
131 | */ | 150 | */ |
132 | int (*cmpxchg_emulated)(unsigned long addr, | 151 | int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, |
152 | unsigned long addr, | ||
133 | const void *old, | 153 | const void *old, |
134 | const void *new, | 154 | const void *new, |
135 | unsigned int bytes, | 155 | unsigned int bytes, |
136 | struct x86_exception *fault, | 156 | struct x86_exception *fault); |
137 | struct kvm_vcpu *vcpu); | 157 | void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); |
138 | 158 | ||
139 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | 159 | int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, |
140 | unsigned int count, struct kvm_vcpu *vcpu); | 160 | int size, unsigned short port, void *val, |
141 | 161 | unsigned int count); | |
142 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | 162 | |
143 | unsigned int count, struct kvm_vcpu *vcpu); | 163 | int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, |
144 | 164 | int size, unsigned short port, const void *val, | |
145 | bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, | 165 | unsigned int count); |
146 | int seg, struct kvm_vcpu *vcpu); | 166 | |
147 | void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, | 167 | bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, |
148 | int seg, struct kvm_vcpu *vcpu); | 168 | struct desc_struct *desc, u32 *base3, int seg); |
149 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | 169 | void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, |
150 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | 170 | struct desc_struct *desc, u32 base3, int seg); |
151 | unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); | 171 | unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, |
152 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | 172 | int seg); |
153 | void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | 173 | void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
154 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | 174 | void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
155 | int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | 175 | void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
156 | int (*cpl)(struct kvm_vcpu *vcpu); | 176 | void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
157 | int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); | 177 | ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); |
158 | int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); | 178 | int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); |
159 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 179 | int (*cpl)(struct x86_emulate_ctxt *ctxt); |
160 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | 180 | int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); |
181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); | ||
182 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); | ||
183 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); | ||
184 | void (*halt)(struct x86_emulate_ctxt *ctxt); | ||
185 | void (*wbinvd)(struct x86_emulate_ctxt *ctxt); | ||
186 | int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); | ||
187 | void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ | ||
188 | void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ | ||
189 | int (*intercept)(struct x86_emulate_ctxt *ctxt, | ||
190 | struct x86_instruction_info *info, | ||
191 | enum x86_intercept_stage stage); | ||
161 | }; | 192 | }; |
162 | 193 | ||
194 | typedef u32 __attribute__((vector_size(16))) sse128_t; | ||
195 | |||
163 | /* Type, address-of, and value of an instruction's operand. */ | 196 | /* Type, address-of, and value of an instruction's operand. */ |
164 | struct operand { | 197 | struct operand { |
165 | enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; | 198 | enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; |
166 | unsigned int bytes; | 199 | unsigned int bytes; |
167 | union { | 200 | union { |
168 | unsigned long orig_val; | 201 | unsigned long orig_val; |
@@ -174,11 +207,13 @@ struct operand { | |||
174 | ulong ea; | 207 | ulong ea; |
175 | unsigned seg; | 208 | unsigned seg; |
176 | } mem; | 209 | } mem; |
210 | unsigned xmm; | ||
177 | } addr; | 211 | } addr; |
178 | union { | 212 | union { |
179 | unsigned long val; | 213 | unsigned long val; |
180 | u64 val64; | 214 | u64 val64; |
181 | char valptr[sizeof(unsigned long) + 2]; | 215 | char valptr[sizeof(unsigned long) + 2]; |
216 | sse128_t vec_val; | ||
182 | }; | 217 | }; |
183 | }; | 218 | }; |
184 | 219 | ||
@@ -197,6 +232,7 @@ struct read_cache { | |||
197 | struct decode_cache { | 232 | struct decode_cache { |
198 | u8 twobyte; | 233 | u8 twobyte; |
199 | u8 b; | 234 | u8 b; |
235 | u8 intercept; | ||
200 | u8 lock_prefix; | 236 | u8 lock_prefix; |
201 | u8 rep_prefix; | 237 | u8 rep_prefix; |
202 | u8 op_bytes; | 238 | u8 op_bytes; |
@@ -209,6 +245,7 @@ struct decode_cache { | |||
209 | u8 seg_override; | 245 | u8 seg_override; |
210 | unsigned int d; | 246 | unsigned int d; |
211 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 247 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
248 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | ||
212 | unsigned long regs[NR_VCPU_REGS]; | 249 | unsigned long regs[NR_VCPU_REGS]; |
213 | unsigned long eip; | 250 | unsigned long eip; |
214 | /* modrm */ | 251 | /* modrm */ |
@@ -227,17 +264,15 @@ struct x86_emulate_ctxt { | |||
227 | struct x86_emulate_ops *ops; | 264 | struct x86_emulate_ops *ops; |
228 | 265 | ||
229 | /* Register state before/after emulation. */ | 266 | /* Register state before/after emulation. */ |
230 | struct kvm_vcpu *vcpu; | ||
231 | |||
232 | unsigned long eflags; | 267 | unsigned long eflags; |
233 | unsigned long eip; /* eip before instruction emulation */ | 268 | unsigned long eip; /* eip before instruction emulation */ |
234 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 269 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
235 | int mode; | 270 | int mode; |
236 | u32 cs_base; | ||
237 | 271 | ||
238 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 272 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
239 | int interruptibility; | 273 | int interruptibility; |
240 | 274 | ||
275 | bool guest_mode; /* guest running a nested guest */ | ||
241 | bool perm_ok; /* do not check permissions if true */ | 276 | bool perm_ok; /* do not check permissions if true */ |
242 | bool only_vendor_specific_insn; | 277 | bool only_vendor_specific_insn; |
243 | 278 | ||
@@ -249,8 +284,8 @@ struct x86_emulate_ctxt { | |||
249 | }; | 284 | }; |
250 | 285 | ||
251 | /* Repeat String Operation Prefix */ | 286 | /* Repeat String Operation Prefix */ |
252 | #define REPE_PREFIX 1 | 287 | #define REPE_PREFIX 0xf3 |
253 | #define REPNE_PREFIX 2 | 288 | #define REPNE_PREFIX 0xf2 |
254 | 289 | ||
255 | /* Execution mode, passed to the emulator. */ | 290 | /* Execution mode, passed to the emulator. */ |
256 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ | 291 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ |
@@ -259,6 +294,69 @@ struct x86_emulate_ctxt { | |||
259 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ | 294 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ |
260 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ | 295 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ |
261 | 296 | ||
297 | /* any protected mode */ | ||
298 | #define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ | ||
299 | X86EMUL_MODE_PROT64) | ||
300 | |||
301 | enum x86_intercept_stage { | ||
302 | X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ | ||
303 | X86_ICPT_PRE_EXCEPT, | ||
304 | X86_ICPT_POST_EXCEPT, | ||
305 | X86_ICPT_POST_MEMACCESS, | ||
306 | }; | ||
307 | |||
308 | enum x86_intercept { | ||
309 | x86_intercept_none, | ||
310 | x86_intercept_cr_read, | ||
311 | x86_intercept_cr_write, | ||
312 | x86_intercept_clts, | ||
313 | x86_intercept_lmsw, | ||
314 | x86_intercept_smsw, | ||
315 | x86_intercept_dr_read, | ||
316 | x86_intercept_dr_write, | ||
317 | x86_intercept_lidt, | ||
318 | x86_intercept_sidt, | ||
319 | x86_intercept_lgdt, | ||
320 | x86_intercept_sgdt, | ||
321 | x86_intercept_lldt, | ||
322 | x86_intercept_sldt, | ||
323 | x86_intercept_ltr, | ||
324 | x86_intercept_str, | ||
325 | x86_intercept_rdtsc, | ||
326 | x86_intercept_rdpmc, | ||
327 | x86_intercept_pushf, | ||
328 | x86_intercept_popf, | ||
329 | x86_intercept_cpuid, | ||
330 | x86_intercept_rsm, | ||
331 | x86_intercept_iret, | ||
332 | x86_intercept_intn, | ||
333 | x86_intercept_invd, | ||
334 | x86_intercept_pause, | ||
335 | x86_intercept_hlt, | ||
336 | x86_intercept_invlpg, | ||
337 | x86_intercept_invlpga, | ||
338 | x86_intercept_vmrun, | ||
339 | x86_intercept_vmload, | ||
340 | x86_intercept_vmsave, | ||
341 | x86_intercept_vmmcall, | ||
342 | x86_intercept_stgi, | ||
343 | x86_intercept_clgi, | ||
344 | x86_intercept_skinit, | ||
345 | x86_intercept_rdtscp, | ||
346 | x86_intercept_icebp, | ||
347 | x86_intercept_wbinvd, | ||
348 | x86_intercept_monitor, | ||
349 | x86_intercept_mwait, | ||
350 | x86_intercept_rdmsr, | ||
351 | x86_intercept_wrmsr, | ||
352 | x86_intercept_in, | ||
353 | x86_intercept_ins, | ||
354 | x86_intercept_out, | ||
355 | x86_intercept_outs, | ||
356 | |||
357 | nr_x86_intercepts | ||
358 | }; | ||
359 | |||
262 | /* Host execution mode. */ | 360 | /* Host execution mode. */ |
263 | #if defined(CONFIG_X86_32) | 361 | #if defined(CONFIG_X86_32) |
264 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 | 362 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 |
@@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); | |||
270 | #define EMULATION_FAILED -1 | 368 | #define EMULATION_FAILED -1 |
271 | #define EMULATION_OK 0 | 369 | #define EMULATION_OK 0 |
272 | #define EMULATION_RESTART 1 | 370 | #define EMULATION_RESTART 1 |
371 | #define EMULATION_INTERCEPTED 2 | ||
273 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); | 372 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); |
274 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | 373 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, |
275 | u16 tss_selector, int reason, | 374 | u16 tss_selector, int reason, |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c8af0991fdf0..d2ac8e2ee897 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -30,14 +30,30 @@ | |||
30 | #define KVM_MEMORY_SLOTS 32 | 30 | #define KVM_MEMORY_SLOTS 32 |
31 | /* memory slots that does not exposed to userspace */ | 31 | /* memory slots that does not exposed to userspace */ |
32 | #define KVM_PRIVATE_MEM_SLOTS 4 | 32 | #define KVM_PRIVATE_MEM_SLOTS 4 |
33 | #define KVM_MMIO_SIZE 16 | ||
33 | 34 | ||
34 | #define KVM_PIO_PAGE_OFFSET 1 | 35 | #define KVM_PIO_PAGE_OFFSET 1 |
35 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 | 36 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 |
36 | 37 | ||
38 | #define CR0_RESERVED_BITS \ | ||
39 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | ||
40 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | ||
41 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | ||
42 | |||
37 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) | 43 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) |
38 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) | 44 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) |
39 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ | 45 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ |
40 | 0xFFFFFF0000000000ULL) | 46 | 0xFFFFFF0000000000ULL) |
47 | #define CR4_RESERVED_BITS \ | ||
48 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | ||
49 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | ||
50 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
51 | | X86_CR4_OSXSAVE \ | ||
52 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | ||
53 | |||
54 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | ||
55 | |||
56 | |||
41 | 57 | ||
42 | #define INVALID_PAGE (~(hpa_t)0) | 58 | #define INVALID_PAGE (~(hpa_t)0) |
43 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | 59 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
@@ -118,6 +134,9 @@ enum kvm_reg { | |||
118 | enum kvm_reg_ex { | 134 | enum kvm_reg_ex { |
119 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, | 135 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, |
120 | VCPU_EXREG_CR3, | 136 | VCPU_EXREG_CR3, |
137 | VCPU_EXREG_RFLAGS, | ||
138 | VCPU_EXREG_CPL, | ||
139 | VCPU_EXREG_SEGMENTS, | ||
121 | }; | 140 | }; |
122 | 141 | ||
123 | enum { | 142 | enum { |
@@ -256,7 +275,7 @@ struct kvm_mmu { | |||
256 | struct kvm_mmu_page *sp); | 275 | struct kvm_mmu_page *sp); |
257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 276 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
258 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 277 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
259 | u64 *spte, const void *pte, unsigned long mmu_seq); | 278 | u64 *spte, const void *pte); |
260 | hpa_t root_hpa; | 279 | hpa_t root_hpa; |
261 | int root_level; | 280 | int root_level; |
262 | int shadow_root_level; | 281 | int shadow_root_level; |
@@ -340,7 +359,6 @@ struct kvm_vcpu_arch { | |||
340 | struct fpu guest_fpu; | 359 | struct fpu guest_fpu; |
341 | u64 xcr0; | 360 | u64 xcr0; |
342 | 361 | ||
343 | gva_t mmio_fault_cr2; | ||
344 | struct kvm_pio_request pio; | 362 | struct kvm_pio_request pio; |
345 | void *pio_data; | 363 | void *pio_data; |
346 | 364 | ||
@@ -367,18 +385,22 @@ struct kvm_vcpu_arch { | |||
367 | /* emulate context */ | 385 | /* emulate context */ |
368 | 386 | ||
369 | struct x86_emulate_ctxt emulate_ctxt; | 387 | struct x86_emulate_ctxt emulate_ctxt; |
388 | bool emulate_regs_need_sync_to_vcpu; | ||
389 | bool emulate_regs_need_sync_from_vcpu; | ||
370 | 390 | ||
371 | gpa_t time; | 391 | gpa_t time; |
372 | struct pvclock_vcpu_time_info hv_clock; | 392 | struct pvclock_vcpu_time_info hv_clock; |
373 | unsigned int hw_tsc_khz; | 393 | unsigned int hw_tsc_khz; |
374 | unsigned int time_offset; | 394 | unsigned int time_offset; |
375 | struct page *time_page; | 395 | struct page *time_page; |
376 | u64 last_host_tsc; | ||
377 | u64 last_guest_tsc; | 396 | u64 last_guest_tsc; |
378 | u64 last_kernel_ns; | 397 | u64 last_kernel_ns; |
379 | u64 last_tsc_nsec; | 398 | u64 last_tsc_nsec; |
380 | u64 last_tsc_write; | 399 | u64 last_tsc_write; |
400 | u32 virtual_tsc_khz; | ||
381 | bool tsc_catchup; | 401 | bool tsc_catchup; |
402 | u32 tsc_catchup_mult; | ||
403 | s8 tsc_catchup_shift; | ||
382 | 404 | ||
383 | bool nmi_pending; | 405 | bool nmi_pending; |
384 | bool nmi_injected; | 406 | bool nmi_injected; |
@@ -448,9 +470,6 @@ struct kvm_arch { | |||
448 | u64 last_tsc_nsec; | 470 | u64 last_tsc_nsec; |
449 | u64 last_tsc_offset; | 471 | u64 last_tsc_offset; |
450 | u64 last_tsc_write; | 472 | u64 last_tsc_write; |
451 | u32 virtual_tsc_khz; | ||
452 | u32 virtual_tsc_mult; | ||
453 | s8 virtual_tsc_shift; | ||
454 | 473 | ||
455 | struct kvm_xen_hvm_config xen_hvm_config; | 474 | struct kvm_xen_hvm_config xen_hvm_config; |
456 | 475 | ||
@@ -502,6 +521,8 @@ struct kvm_vcpu_stat { | |||
502 | u32 nmi_injections; | 521 | u32 nmi_injections; |
503 | }; | 522 | }; |
504 | 523 | ||
524 | struct x86_instruction_info; | ||
525 | |||
505 | struct kvm_x86_ops { | 526 | struct kvm_x86_ops { |
506 | int (*cpu_has_kvm_support)(void); /* __init */ | 527 | int (*cpu_has_kvm_support)(void); /* __init */ |
507 | int (*disabled_by_bios)(void); /* __init */ | 528 | int (*disabled_by_bios)(void); /* __init */ |
@@ -586,9 +607,17 @@ struct kvm_x86_ops { | |||
586 | 607 | ||
587 | bool (*has_wbinvd_exit)(void); | 608 | bool (*has_wbinvd_exit)(void); |
588 | 609 | ||
610 | void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); | ||
589 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 611 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
590 | 612 | ||
613 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); | ||
614 | |||
591 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); | 615 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); |
616 | |||
617 | int (*check_intercept)(struct kvm_vcpu *vcpu, | ||
618 | struct x86_instruction_info *info, | ||
619 | enum x86_intercept_stage stage); | ||
620 | |||
592 | const struct trace_print_flags *exit_reasons_str; | 621 | const struct trace_print_flags *exit_reasons_str; |
593 | }; | 622 | }; |
594 | 623 | ||
@@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | |||
627 | 656 | ||
628 | extern bool tdp_enabled; | 657 | extern bool tdp_enabled; |
629 | 658 | ||
659 | /* control of guest tsc rate supported? */ | ||
660 | extern bool kvm_has_tsc_control; | ||
661 | /* minimum supported tsc_khz for guests */ | ||
662 | extern u32 kvm_min_guest_tsc_khz; | ||
663 | /* maximum supported tsc_khz for guests */ | ||
664 | extern u32 kvm_max_guest_tsc_khz; | ||
665 | |||
630 | enum emulation_result { | 666 | enum emulation_result { |
631 | EMULATE_DONE, /* no further processing */ | 667 | EMULATE_DONE, /* no further processing */ |
632 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ | 668 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ |
@@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, | |||
645 | return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); | 681 | return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); |
646 | } | 682 | } |
647 | 683 | ||
648 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | ||
649 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | ||
650 | |||
651 | void kvm_enable_efer_bits(u64); | 684 | void kvm_enable_efer_bits(u64); |
652 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 685 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
653 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 686 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
@@ -657,8 +690,6 @@ struct x86_emulate_ctxt; | |||
657 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); | 690 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); |
658 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 691 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
659 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 692 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
660 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | ||
661 | int emulate_clts(struct kvm_vcpu *vcpu); | ||
662 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); | 693 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
663 | 694 | ||
664 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 695 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
@@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, | |||
721 | 752 | ||
722 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 753 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
723 | 754 | ||
724 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu); | ||
725 | |||
726 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, | 755 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, |
727 | void *insn, int insn_len); | 756 | void *insn, int insn_len); |
728 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | 757 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3cce71413d0b..485b4f1f079b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -118,6 +118,7 @@ | |||
118 | complete list. */ | 118 | complete list. */ |
119 | 119 | ||
120 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b | 120 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b |
121 | #define MSR_AMD64_TSC_RATIO 0xc0000104 | ||
121 | #define MSR_AMD64_NB_CFG 0xc001001f | 122 | #define MSR_AMD64_NB_CFG 0xc001001f |
122 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | 123 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 |
123 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 | 124 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ad47b819a8b..d6e2477feb18 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -73,9 +73,14 @@ | |||
73 | #define MemAbs (1<<11) /* Memory operand is absolute displacement */ | 73 | #define MemAbs (1<<11) /* Memory operand is absolute displacement */ |
74 | #define String (1<<12) /* String instruction (rep capable) */ | 74 | #define String (1<<12) /* String instruction (rep capable) */ |
75 | #define Stack (1<<13) /* Stack instruction (push/pop) */ | 75 | #define Stack (1<<13) /* Stack instruction (push/pop) */ |
76 | #define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ | ||
76 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 77 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
77 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 78 | #define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ |
79 | #define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ | ||
80 | #define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ | ||
81 | #define Sse (1<<17) /* SSE Vector instruction */ | ||
78 | /* Misc flags */ | 82 | /* Misc flags */ |
83 | #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ | ||
79 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ | 84 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ |
80 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ | 85 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ |
81 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ | 86 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ |
@@ -102,11 +107,14 @@ | |||
102 | 107 | ||
103 | struct opcode { | 108 | struct opcode { |
104 | u32 flags; | 109 | u32 flags; |
110 | u8 intercept; | ||
105 | union { | 111 | union { |
106 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 112 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
107 | struct opcode *group; | 113 | struct opcode *group; |
108 | struct group_dual *gdual; | 114 | struct group_dual *gdual; |
115 | struct gprefix *gprefix; | ||
109 | } u; | 116 | } u; |
117 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | ||
110 | }; | 118 | }; |
111 | 119 | ||
112 | struct group_dual { | 120 | struct group_dual { |
@@ -114,6 +122,13 @@ struct group_dual { | |||
114 | struct opcode mod3[8]; | 122 | struct opcode mod3[8]; |
115 | }; | 123 | }; |
116 | 124 | ||
125 | struct gprefix { | ||
126 | struct opcode pfx_no; | ||
127 | struct opcode pfx_66; | ||
128 | struct opcode pfx_f2; | ||
129 | struct opcode pfx_f3; | ||
130 | }; | ||
131 | |||
117 | /* EFLAGS bit definitions. */ | 132 | /* EFLAGS bit definitions. */ |
118 | #define EFLG_ID (1<<21) | 133 | #define EFLG_ID (1<<21) |
119 | #define EFLG_VIP (1<<20) | 134 | #define EFLG_VIP (1<<20) |
@@ -248,42 +263,42 @@ struct group_dual { | |||
248 | "w", "r", _LO32, "r", "", "r") | 263 | "w", "r", _LO32, "r", "", "r") |
249 | 264 | ||
250 | /* Instruction has three operands and one operand is stored in ECX register */ | 265 | /* Instruction has three operands and one operand is stored in ECX register */ |
251 | #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ | 266 | #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ |
252 | do { \ | 267 | do { \ |
253 | unsigned long _tmp; \ | 268 | unsigned long _tmp; \ |
254 | _type _clv = (_cl).val; \ | 269 | _type _clv = (_cl).val; \ |
255 | _type _srcv = (_src).val; \ | 270 | _type _srcv = (_src).val; \ |
256 | _type _dstv = (_dst).val; \ | 271 | _type _dstv = (_dst).val; \ |
257 | \ | 272 | \ |
258 | __asm__ __volatile__ ( \ | 273 | __asm__ __volatile__ ( \ |
259 | _PRE_EFLAGS("0", "5", "2") \ | 274 | _PRE_EFLAGS("0", "5", "2") \ |
260 | _op _suffix " %4,%1 \n" \ | 275 | _op _suffix " %4,%1 \n" \ |
261 | _POST_EFLAGS("0", "5", "2") \ | 276 | _POST_EFLAGS("0", "5", "2") \ |
262 | : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ | 277 | : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ |
263 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ | 278 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ |
264 | ); \ | 279 | ); \ |
265 | \ | 280 | \ |
266 | (_cl).val = (unsigned long) _clv; \ | 281 | (_cl).val = (unsigned long) _clv; \ |
267 | (_src).val = (unsigned long) _srcv; \ | 282 | (_src).val = (unsigned long) _srcv; \ |
268 | (_dst).val = (unsigned long) _dstv; \ | 283 | (_dst).val = (unsigned long) _dstv; \ |
269 | } while (0) | 284 | } while (0) |
270 | 285 | ||
271 | #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ | 286 | #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ |
272 | do { \ | 287 | do { \ |
273 | switch ((_dst).bytes) { \ | 288 | switch ((_dst).bytes) { \ |
274 | case 2: \ | 289 | case 2: \ |
275 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 290 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
276 | "w", unsigned short); \ | 291 | "w", unsigned short); \ |
277 | break; \ | 292 | break; \ |
278 | case 4: \ | 293 | case 4: \ |
279 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 294 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
280 | "l", unsigned int); \ | 295 | "l", unsigned int); \ |
281 | break; \ | 296 | break; \ |
282 | case 8: \ | 297 | case 8: \ |
283 | ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 298 | ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
284 | "q", unsigned long)); \ | 299 | "q", unsigned long)); \ |
285 | break; \ | 300 | break; \ |
286 | } \ | 301 | } \ |
287 | } while (0) | 302 | } while (0) |
288 | 303 | ||
289 | #define __emulate_1op(_op, _dst, _eflags, _suffix) \ | 304 | #define __emulate_1op(_op, _dst, _eflags, _suffix) \ |
@@ -346,13 +361,25 @@ struct group_dual { | |||
346 | } while (0) | 361 | } while (0) |
347 | 362 | ||
348 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ | 363 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ |
349 | #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ | 364 | #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ |
350 | do { \ | 365 | do { \ |
351 | switch((_src).bytes) { \ | 366 | switch((_src).bytes) { \ |
352 | case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ | 367 | case 1: \ |
353 | case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ | 368 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ |
354 | case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ | 369 | _eflags, "b"); \ |
355 | case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ | 370 | break; \ |
371 | case 2: \ | ||
372 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | ||
373 | _eflags, "w"); \ | ||
374 | break; \ | ||
375 | case 4: \ | ||
376 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | ||
377 | _eflags, "l"); \ | ||
378 | break; \ | ||
379 | case 8: \ | ||
380 | ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | ||
381 | _eflags, "q")); \ | ||
382 | break; \ | ||
356 | } \ | 383 | } \ |
357 | } while (0) | 384 | } while (0) |
358 | 385 | ||
@@ -388,13 +415,33 @@ struct group_dual { | |||
388 | (_type)_x; \ | 415 | (_type)_x; \ |
389 | }) | 416 | }) |
390 | 417 | ||
391 | #define insn_fetch_arr(_arr, _size, _eip) \ | 418 | #define insn_fetch_arr(_arr, _size, _eip) \ |
392 | ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ | 419 | ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ |
393 | if (rc != X86EMUL_CONTINUE) \ | 420 | if (rc != X86EMUL_CONTINUE) \ |
394 | goto done; \ | 421 | goto done; \ |
395 | (_eip) += (_size); \ | 422 | (_eip) += (_size); \ |
396 | }) | 423 | }) |
397 | 424 | ||
425 | static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, | ||
426 | enum x86_intercept intercept, | ||
427 | enum x86_intercept_stage stage) | ||
428 | { | ||
429 | struct x86_instruction_info info = { | ||
430 | .intercept = intercept, | ||
431 | .rep_prefix = ctxt->decode.rep_prefix, | ||
432 | .modrm_mod = ctxt->decode.modrm_mod, | ||
433 | .modrm_reg = ctxt->decode.modrm_reg, | ||
434 | .modrm_rm = ctxt->decode.modrm_rm, | ||
435 | .src_val = ctxt->decode.src.val64, | ||
436 | .src_bytes = ctxt->decode.src.bytes, | ||
437 | .dst_bytes = ctxt->decode.dst.bytes, | ||
438 | .ad_bytes = ctxt->decode.ad_bytes, | ||
439 | .next_rip = ctxt->eip, | ||
440 | }; | ||
441 | |||
442 | return ctxt->ops->intercept(ctxt, &info, stage); | ||
443 | } | ||
444 | |||
398 | static inline unsigned long ad_mask(struct decode_cache *c) | 445 | static inline unsigned long ad_mask(struct decode_cache *c) |
399 | { | 446 | { |
400 | return (1UL << (c->ad_bytes << 3)) - 1; | 447 | return (1UL << (c->ad_bytes << 3)) - 1; |
@@ -430,6 +477,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel) | |||
430 | register_address_increment(c, &c->eip, rel); | 477 | register_address_increment(c, &c->eip, rel); |
431 | } | 478 | } |
432 | 479 | ||
480 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
481 | { | ||
482 | u32 limit = get_desc_limit(desc); | ||
483 | |||
484 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
485 | } | ||
486 | |||
433 | static void set_seg_override(struct decode_cache *c, int seg) | 487 | static void set_seg_override(struct decode_cache *c, int seg) |
434 | { | 488 | { |
435 | c->has_seg_override = true; | 489 | c->has_seg_override = true; |
@@ -442,11 +496,10 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, | |||
442 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) | 496 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) |
443 | return 0; | 497 | return 0; |
444 | 498 | ||
445 | return ops->get_cached_segment_base(seg, ctxt->vcpu); | 499 | return ops->get_cached_segment_base(ctxt, seg); |
446 | } | 500 | } |
447 | 501 | ||
448 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt, | 502 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt, |
449 | struct x86_emulate_ops *ops, | ||
450 | struct decode_cache *c) | 503 | struct decode_cache *c) |
451 | { | 504 | { |
452 | if (!c->has_seg_override) | 505 | if (!c->has_seg_override) |
@@ -455,18 +508,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, | |||
455 | return c->seg_override; | 508 | return c->seg_override; |
456 | } | 509 | } |
457 | 510 | ||
458 | static ulong linear(struct x86_emulate_ctxt *ctxt, | ||
459 | struct segmented_address addr) | ||
460 | { | ||
461 | struct decode_cache *c = &ctxt->decode; | ||
462 | ulong la; | ||
463 | |||
464 | la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; | ||
465 | if (c->ad_bytes != 8) | ||
466 | la &= (u32)-1; | ||
467 | return la; | ||
468 | } | ||
469 | |||
470 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | 511 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, |
471 | u32 error, bool valid) | 512 | u32 error, bool valid) |
472 | { | 513 | { |
@@ -476,11 +517,21 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | |||
476 | return X86EMUL_PROPAGATE_FAULT; | 517 | return X86EMUL_PROPAGATE_FAULT; |
477 | } | 518 | } |
478 | 519 | ||
520 | static int emulate_db(struct x86_emulate_ctxt *ctxt) | ||
521 | { | ||
522 | return emulate_exception(ctxt, DB_VECTOR, 0, false); | ||
523 | } | ||
524 | |||
479 | static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) | 525 | static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) |
480 | { | 526 | { |
481 | return emulate_exception(ctxt, GP_VECTOR, err, true); | 527 | return emulate_exception(ctxt, GP_VECTOR, err, true); |
482 | } | 528 | } |
483 | 529 | ||
530 | static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err) | ||
531 | { | ||
532 | return emulate_exception(ctxt, SS_VECTOR, err, true); | ||
533 | } | ||
534 | |||
484 | static int emulate_ud(struct x86_emulate_ctxt *ctxt) | 535 | static int emulate_ud(struct x86_emulate_ctxt *ctxt) |
485 | { | 536 | { |
486 | return emulate_exception(ctxt, UD_VECTOR, 0, false); | 537 | return emulate_exception(ctxt, UD_VECTOR, 0, false); |
@@ -496,6 +547,128 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt) | |||
496 | return emulate_exception(ctxt, DE_VECTOR, 0, false); | 547 | return emulate_exception(ctxt, DE_VECTOR, 0, false); |
497 | } | 548 | } |
498 | 549 | ||
550 | static int emulate_nm(struct x86_emulate_ctxt *ctxt) | ||
551 | { | ||
552 | return emulate_exception(ctxt, NM_VECTOR, 0, false); | ||
553 | } | ||
554 | |||
555 | static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) | ||
556 | { | ||
557 | u16 selector; | ||
558 | struct desc_struct desc; | ||
559 | |||
560 | ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg); | ||
561 | return selector; | ||
562 | } | ||
563 | |||
564 | static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, | ||
565 | unsigned seg) | ||
566 | { | ||
567 | u16 dummy; | ||
568 | u32 base3; | ||
569 | struct desc_struct desc; | ||
570 | |||
571 | ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg); | ||
572 | ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); | ||
573 | } | ||
574 | |||
575 | static int __linearize(struct x86_emulate_ctxt *ctxt, | ||
576 | struct segmented_address addr, | ||
577 | unsigned size, bool write, bool fetch, | ||
578 | ulong *linear) | ||
579 | { | ||
580 | struct decode_cache *c = &ctxt->decode; | ||
581 | struct desc_struct desc; | ||
582 | bool usable; | ||
583 | ulong la; | ||
584 | u32 lim; | ||
585 | u16 sel; | ||
586 | unsigned cpl, rpl; | ||
587 | |||
588 | la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; | ||
589 | switch (ctxt->mode) { | ||
590 | case X86EMUL_MODE_REAL: | ||
591 | break; | ||
592 | case X86EMUL_MODE_PROT64: | ||
593 | if (((signed long)la << 16) >> 16 != la) | ||
594 | return emulate_gp(ctxt, 0); | ||
595 | break; | ||
596 | default: | ||
597 | usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, | ||
598 | addr.seg); | ||
599 | if (!usable) | ||
600 | goto bad; | ||
601 | /* code segment or read-only data segment */ | ||
602 | if (((desc.type & 8) || !(desc.type & 2)) && write) | ||
603 | goto bad; | ||
604 | /* unreadable code segment */ | ||
605 | if (!fetch && (desc.type & 8) && !(desc.type & 2)) | ||
606 | goto bad; | ||
607 | lim = desc_limit_scaled(&desc); | ||
608 | if ((desc.type & 8) || !(desc.type & 4)) { | ||
609 | /* expand-up segment */ | ||
610 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | ||
611 | goto bad; | ||
612 | } else { | ||
613 | /* exapand-down segment */ | ||
614 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) | ||
615 | goto bad; | ||
616 | lim = desc.d ? 0xffffffff : 0xffff; | ||
617 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | ||
618 | goto bad; | ||
619 | } | ||
620 | cpl = ctxt->ops->cpl(ctxt); | ||
621 | rpl = sel & 3; | ||
622 | cpl = max(cpl, rpl); | ||
623 | if (!(desc.type & 8)) { | ||
624 | /* data segment */ | ||
625 | if (cpl > desc.dpl) | ||
626 | goto bad; | ||
627 | } else if ((desc.type & 8) && !(desc.type & 4)) { | ||
628 | /* nonconforming code segment */ | ||
629 | if (cpl != desc.dpl) | ||
630 | goto bad; | ||
631 | } else if ((desc.type & 8) && (desc.type & 4)) { | ||
632 | /* conforming code segment */ | ||
633 | if (cpl < desc.dpl) | ||
634 | goto bad; | ||
635 | } | ||
636 | break; | ||
637 | } | ||
638 | if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) | ||
639 | la &= (u32)-1; | ||
640 | *linear = la; | ||
641 | return X86EMUL_CONTINUE; | ||
642 | bad: | ||
643 | if (addr.seg == VCPU_SREG_SS) | ||
644 | return emulate_ss(ctxt, addr.seg); | ||
645 | else | ||
646 | return emulate_gp(ctxt, addr.seg); | ||
647 | } | ||
648 | |||
649 | static int linearize(struct x86_emulate_ctxt *ctxt, | ||
650 | struct segmented_address addr, | ||
651 | unsigned size, bool write, | ||
652 | ulong *linear) | ||
653 | { | ||
654 | return __linearize(ctxt, addr, size, write, false, linear); | ||
655 | } | ||
656 | |||
657 | |||
658 | static int segmented_read_std(struct x86_emulate_ctxt *ctxt, | ||
659 | struct segmented_address addr, | ||
660 | void *data, | ||
661 | unsigned size) | ||
662 | { | ||
663 | int rc; | ||
664 | ulong linear; | ||
665 | |||
666 | rc = linearize(ctxt, addr, size, false, &linear); | ||
667 | if (rc != X86EMUL_CONTINUE) | ||
668 | return rc; | ||
669 | return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); | ||
670 | } | ||
671 | |||
499 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 672 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
500 | struct x86_emulate_ops *ops, | 673 | struct x86_emulate_ops *ops, |
501 | unsigned long eip, u8 *dest) | 674 | unsigned long eip, u8 *dest) |
@@ -505,10 +678,15 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
505 | int size, cur_size; | 678 | int size, cur_size; |
506 | 679 | ||
507 | if (eip == fc->end) { | 680 | if (eip == fc->end) { |
681 | unsigned long linear; | ||
682 | struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; | ||
508 | cur_size = fc->end - fc->start; | 683 | cur_size = fc->end - fc->start; |
509 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); | 684 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
510 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, | 685 | rc = __linearize(ctxt, addr, size, false, true, &linear); |
511 | size, ctxt->vcpu, &ctxt->exception); | 686 | if (rc != X86EMUL_CONTINUE) |
687 | return rc; | ||
688 | rc = ops->fetch(ctxt, linear, fc->data + cur_size, | ||
689 | size, &ctxt->exception); | ||
512 | if (rc != X86EMUL_CONTINUE) | 690 | if (rc != X86EMUL_CONTINUE) |
513 | return rc; | 691 | return rc; |
514 | fc->end += size; | 692 | fc->end += size; |
@@ -551,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, | |||
551 | } | 729 | } |
552 | 730 | ||
553 | static int read_descriptor(struct x86_emulate_ctxt *ctxt, | 731 | static int read_descriptor(struct x86_emulate_ctxt *ctxt, |
554 | struct x86_emulate_ops *ops, | ||
555 | struct segmented_address addr, | 732 | struct segmented_address addr, |
556 | u16 *size, unsigned long *address, int op_bytes) | 733 | u16 *size, unsigned long *address, int op_bytes) |
557 | { | 734 | { |
@@ -560,13 +737,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
560 | if (op_bytes == 2) | 737 | if (op_bytes == 2) |
561 | op_bytes = 3; | 738 | op_bytes = 3; |
562 | *address = 0; | 739 | *address = 0; |
563 | rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, | 740 | rc = segmented_read_std(ctxt, addr, size, 2); |
564 | ctxt->vcpu, &ctxt->exception); | ||
565 | if (rc != X86EMUL_CONTINUE) | 741 | if (rc != X86EMUL_CONTINUE) |
566 | return rc; | 742 | return rc; |
567 | addr.ea += 2; | 743 | addr.ea += 2; |
568 | rc = ops->read_std(linear(ctxt, addr), address, op_bytes, | 744 | rc = segmented_read_std(ctxt, addr, address, op_bytes); |
569 | ctxt->vcpu, &ctxt->exception); | ||
570 | return rc; | 745 | return rc; |
571 | } | 746 | } |
572 | 747 | ||
@@ -623,7 +798,63 @@ static void fetch_register_operand(struct operand *op) | |||
623 | } | 798 | } |
624 | } | 799 | } |
625 | 800 | ||
626 | static void decode_register_operand(struct operand *op, | 801 | static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) |
802 | { | ||
803 | ctxt->ops->get_fpu(ctxt); | ||
804 | switch (reg) { | ||
805 | case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; | ||
806 | case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; | ||
807 | case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; | ||
808 | case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; | ||
809 | case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; | ||
810 | case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; | ||
811 | case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; | ||
812 | case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; | ||
813 | #ifdef CONFIG_X86_64 | ||
814 | case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; | ||
815 | case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; | ||
816 | case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; | ||
817 | case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; | ||
818 | case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; | ||
819 | case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; | ||
820 | case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; | ||
821 | case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; | ||
822 | #endif | ||
823 | default: BUG(); | ||
824 | } | ||
825 | ctxt->ops->put_fpu(ctxt); | ||
826 | } | ||
827 | |||
828 | static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, | ||
829 | int reg) | ||
830 | { | ||
831 | ctxt->ops->get_fpu(ctxt); | ||
832 | switch (reg) { | ||
833 | case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; | ||
834 | case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; | ||
835 | case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; | ||
836 | case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; | ||
837 | case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; | ||
838 | case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; | ||
839 | case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; | ||
840 | case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; | ||
841 | #ifdef CONFIG_X86_64 | ||
842 | case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; | ||
843 | case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; | ||
844 | case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; | ||
845 | case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; | ||
846 | case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; | ||
847 | case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; | ||
848 | case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; | ||
849 | case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; | ||
850 | #endif | ||
851 | default: BUG(); | ||
852 | } | ||
853 | ctxt->ops->put_fpu(ctxt); | ||
854 | } | ||
855 | |||
856 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | ||
857 | struct operand *op, | ||
627 | struct decode_cache *c, | 858 | struct decode_cache *c, |
628 | int inhibit_bytereg) | 859 | int inhibit_bytereg) |
629 | { | 860 | { |
@@ -632,6 +863,15 @@ static void decode_register_operand(struct operand *op, | |||
632 | 863 | ||
633 | if (!(c->d & ModRM)) | 864 | if (!(c->d & ModRM)) |
634 | reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); | 865 | reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); |
866 | |||
867 | if (c->d & Sse) { | ||
868 | op->type = OP_XMM; | ||
869 | op->bytes = 16; | ||
870 | op->addr.xmm = reg; | ||
871 | read_sse_reg(ctxt, &op->vec_val, reg); | ||
872 | return; | ||
873 | } | ||
874 | |||
635 | op->type = OP_REG; | 875 | op->type = OP_REG; |
636 | if ((c->d & ByteOp) && !inhibit_bytereg) { | 876 | if ((c->d & ByteOp) && !inhibit_bytereg) { |
637 | op->addr.reg = decode_register(reg, c->regs, highbyte_regs); | 877 | op->addr.reg = decode_register(reg, c->regs, highbyte_regs); |
@@ -671,6 +911,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
671 | op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 911 | op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
672 | op->addr.reg = decode_register(c->modrm_rm, | 912 | op->addr.reg = decode_register(c->modrm_rm, |
673 | c->regs, c->d & ByteOp); | 913 | c->regs, c->d & ByteOp); |
914 | if (c->d & Sse) { | ||
915 | op->type = OP_XMM; | ||
916 | op->bytes = 16; | ||
917 | op->addr.xmm = c->modrm_rm; | ||
918 | read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); | ||
919 | return rc; | ||
920 | } | ||
674 | fetch_register_operand(op); | 921 | fetch_register_operand(op); |
675 | return rc; | 922 | return rc; |
676 | } | 923 | } |
@@ -819,8 +1066,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
819 | if (mc->pos < mc->end) | 1066 | if (mc->pos < mc->end) |
820 | goto read_cached; | 1067 | goto read_cached; |
821 | 1068 | ||
822 | rc = ops->read_emulated(addr, mc->data + mc->end, n, | 1069 | rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, |
823 | &ctxt->exception, ctxt->vcpu); | 1070 | &ctxt->exception); |
824 | if (rc != X86EMUL_CONTINUE) | 1071 | if (rc != X86EMUL_CONTINUE) |
825 | return rc; | 1072 | return rc; |
826 | mc->end += n; | 1073 | mc->end += n; |
@@ -834,6 +1081,50 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
834 | return X86EMUL_CONTINUE; | 1081 | return X86EMUL_CONTINUE; |
835 | } | 1082 | } |
836 | 1083 | ||
1084 | static int segmented_read(struct x86_emulate_ctxt *ctxt, | ||
1085 | struct segmented_address addr, | ||
1086 | void *data, | ||
1087 | unsigned size) | ||
1088 | { | ||
1089 | int rc; | ||
1090 | ulong linear; | ||
1091 | |||
1092 | rc = linearize(ctxt, addr, size, false, &linear); | ||
1093 | if (rc != X86EMUL_CONTINUE) | ||
1094 | return rc; | ||
1095 | return read_emulated(ctxt, ctxt->ops, linear, data, size); | ||
1096 | } | ||
1097 | |||
1098 | static int segmented_write(struct x86_emulate_ctxt *ctxt, | ||
1099 | struct segmented_address addr, | ||
1100 | const void *data, | ||
1101 | unsigned size) | ||
1102 | { | ||
1103 | int rc; | ||
1104 | ulong linear; | ||
1105 | |||
1106 | rc = linearize(ctxt, addr, size, true, &linear); | ||
1107 | if (rc != X86EMUL_CONTINUE) | ||
1108 | return rc; | ||
1109 | return ctxt->ops->write_emulated(ctxt, linear, data, size, | ||
1110 | &ctxt->exception); | ||
1111 | } | ||
1112 | |||
1113 | static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, | ||
1114 | struct segmented_address addr, | ||
1115 | const void *orig_data, const void *data, | ||
1116 | unsigned size) | ||
1117 | { | ||
1118 | int rc; | ||
1119 | ulong linear; | ||
1120 | |||
1121 | rc = linearize(ctxt, addr, size, true, &linear); | ||
1122 | if (rc != X86EMUL_CONTINUE) | ||
1123 | return rc; | ||
1124 | return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data, | ||
1125 | size, &ctxt->exception); | ||
1126 | } | ||
1127 | |||
837 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 1128 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
838 | struct x86_emulate_ops *ops, | 1129 | struct x86_emulate_ops *ops, |
839 | unsigned int size, unsigned short port, | 1130 | unsigned int size, unsigned short port, |
@@ -854,7 +1145,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
854 | if (n == 0) | 1145 | if (n == 0) |
855 | n = 1; | 1146 | n = 1; |
856 | rc->pos = rc->end = 0; | 1147 | rc->pos = rc->end = 0; |
857 | if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) | 1148 | if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) |
858 | return 0; | 1149 | return 0; |
859 | rc->end = n * size; | 1150 | rc->end = n * size; |
860 | } | 1151 | } |
@@ -864,28 +1155,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
864 | return 1; | 1155 | return 1; |
865 | } | 1156 | } |
866 | 1157 | ||
867 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
868 | { | ||
869 | u32 limit = get_desc_limit(desc); | ||
870 | |||
871 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
872 | } | ||
873 | |||
874 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | 1158 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, |
875 | struct x86_emulate_ops *ops, | 1159 | struct x86_emulate_ops *ops, |
876 | u16 selector, struct desc_ptr *dt) | 1160 | u16 selector, struct desc_ptr *dt) |
877 | { | 1161 | { |
878 | if (selector & 1 << 2) { | 1162 | if (selector & 1 << 2) { |
879 | struct desc_struct desc; | 1163 | struct desc_struct desc; |
1164 | u16 sel; | ||
1165 | |||
880 | memset (dt, 0, sizeof *dt); | 1166 | memset (dt, 0, sizeof *dt); |
881 | if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, | 1167 | if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) |
882 | ctxt->vcpu)) | ||
883 | return; | 1168 | return; |
884 | 1169 | ||
885 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | 1170 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ |
886 | dt->address = get_desc_base(&desc); | 1171 | dt->address = get_desc_base(&desc); |
887 | } else | 1172 | } else |
888 | ops->get_gdt(dt, ctxt->vcpu); | 1173 | ops->get_gdt(ctxt, dt); |
889 | } | 1174 | } |
890 | 1175 | ||
891 | /* allowed just for 8 bytes segments */ | 1176 | /* allowed just for 8 bytes segments */ |
@@ -903,8 +1188,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
903 | if (dt.size < index * 8 + 7) | 1188 | if (dt.size < index * 8 + 7) |
904 | return emulate_gp(ctxt, selector & 0xfffc); | 1189 | return emulate_gp(ctxt, selector & 0xfffc); |
905 | addr = dt.address + index * 8; | 1190 | addr = dt.address + index * 8; |
906 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, | 1191 | ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); |
907 | &ctxt->exception); | ||
908 | 1192 | ||
909 | return ret; | 1193 | return ret; |
910 | } | 1194 | } |
@@ -925,8 +1209,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
925 | return emulate_gp(ctxt, selector & 0xfffc); | 1209 | return emulate_gp(ctxt, selector & 0xfffc); |
926 | 1210 | ||
927 | addr = dt.address + index * 8; | 1211 | addr = dt.address + index * 8; |
928 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, | 1212 | ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); |
929 | &ctxt->exception); | ||
930 | 1213 | ||
931 | return ret; | 1214 | return ret; |
932 | } | 1215 | } |
@@ -986,7 +1269,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
986 | 1269 | ||
987 | rpl = selector & 3; | 1270 | rpl = selector & 3; |
988 | dpl = seg_desc.dpl; | 1271 | dpl = seg_desc.dpl; |
989 | cpl = ops->cpl(ctxt->vcpu); | 1272 | cpl = ops->cpl(ctxt); |
990 | 1273 | ||
991 | switch (seg) { | 1274 | switch (seg) { |
992 | case VCPU_SREG_SS: | 1275 | case VCPU_SREG_SS: |
@@ -1042,8 +1325,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1042 | return ret; | 1325 | return ret; |
1043 | } | 1326 | } |
1044 | load: | 1327 | load: |
1045 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | 1328 | ops->set_segment(ctxt, selector, &seg_desc, 0, seg); |
1046 | ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); | ||
1047 | return X86EMUL_CONTINUE; | 1329 | return X86EMUL_CONTINUE; |
1048 | exception: | 1330 | exception: |
1049 | emulate_exception(ctxt, err_vec, err_code, true); | 1331 | emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1069,8 +1351,7 @@ static void write_register_operand(struct operand *op) | |||
1069 | } | 1351 | } |
1070 | } | 1352 | } |
1071 | 1353 | ||
1072 | static inline int writeback(struct x86_emulate_ctxt *ctxt, | 1354 | static int writeback(struct x86_emulate_ctxt *ctxt) |
1073 | struct x86_emulate_ops *ops) | ||
1074 | { | 1355 | { |
1075 | int rc; | 1356 | int rc; |
1076 | struct decode_cache *c = &ctxt->decode; | 1357 | struct decode_cache *c = &ctxt->decode; |
@@ -1081,23 +1362,22 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1081 | break; | 1362 | break; |
1082 | case OP_MEM: | 1363 | case OP_MEM: |
1083 | if (c->lock_prefix) | 1364 | if (c->lock_prefix) |
1084 | rc = ops->cmpxchg_emulated( | 1365 | rc = segmented_cmpxchg(ctxt, |
1085 | linear(ctxt, c->dst.addr.mem), | 1366 | c->dst.addr.mem, |
1086 | &c->dst.orig_val, | 1367 | &c->dst.orig_val, |
1087 | &c->dst.val, | 1368 | &c->dst.val, |
1088 | c->dst.bytes, | 1369 | c->dst.bytes); |
1089 | &ctxt->exception, | ||
1090 | ctxt->vcpu); | ||
1091 | else | 1370 | else |
1092 | rc = ops->write_emulated( | 1371 | rc = segmented_write(ctxt, |
1093 | linear(ctxt, c->dst.addr.mem), | 1372 | c->dst.addr.mem, |
1094 | &c->dst.val, | 1373 | &c->dst.val, |
1095 | c->dst.bytes, | 1374 | c->dst.bytes); |
1096 | &ctxt->exception, | ||
1097 | ctxt->vcpu); | ||
1098 | if (rc != X86EMUL_CONTINUE) | 1375 | if (rc != X86EMUL_CONTINUE) |
1099 | return rc; | 1376 | return rc; |
1100 | break; | 1377 | break; |
1378 | case OP_XMM: | ||
1379 | write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); | ||
1380 | break; | ||
1101 | case OP_NONE: | 1381 | case OP_NONE: |
1102 | /* no writeback */ | 1382 | /* no writeback */ |
1103 | break; | 1383 | break; |
@@ -1107,21 +1387,21 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1107 | return X86EMUL_CONTINUE; | 1387 | return X86EMUL_CONTINUE; |
1108 | } | 1388 | } |
1109 | 1389 | ||
1110 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt, | 1390 | static int em_push(struct x86_emulate_ctxt *ctxt) |
1111 | struct x86_emulate_ops *ops) | ||
1112 | { | 1391 | { |
1113 | struct decode_cache *c = &ctxt->decode; | 1392 | struct decode_cache *c = &ctxt->decode; |
1393 | struct segmented_address addr; | ||
1114 | 1394 | ||
1115 | c->dst.type = OP_MEM; | ||
1116 | c->dst.bytes = c->op_bytes; | ||
1117 | c->dst.val = c->src.val; | ||
1118 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1395 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
1119 | c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); | 1396 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); |
1120 | c->dst.addr.mem.seg = VCPU_SREG_SS; | 1397 | addr.seg = VCPU_SREG_SS; |
1398 | |||
1399 | /* Disable writeback. */ | ||
1400 | c->dst.type = OP_NONE; | ||
1401 | return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); | ||
1121 | } | 1402 | } |
1122 | 1403 | ||
1123 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, | 1404 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
1124 | struct x86_emulate_ops *ops, | ||
1125 | void *dest, int len) | 1405 | void *dest, int len) |
1126 | { | 1406 | { |
1127 | struct decode_cache *c = &ctxt->decode; | 1407 | struct decode_cache *c = &ctxt->decode; |
@@ -1130,7 +1410,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1130 | 1410 | ||
1131 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); | 1411 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); |
1132 | addr.seg = VCPU_SREG_SS; | 1412 | addr.seg = VCPU_SREG_SS; |
1133 | rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); | 1413 | rc = segmented_read(ctxt, addr, dest, len); |
1134 | if (rc != X86EMUL_CONTINUE) | 1414 | if (rc != X86EMUL_CONTINUE) |
1135 | return rc; | 1415 | return rc; |
1136 | 1416 | ||
@@ -1138,6 +1418,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1138 | return rc; | 1418 | return rc; |
1139 | } | 1419 | } |
1140 | 1420 | ||
1421 | static int em_pop(struct x86_emulate_ctxt *ctxt) | ||
1422 | { | ||
1423 | struct decode_cache *c = &ctxt->decode; | ||
1424 | |||
1425 | return emulate_pop(ctxt, &c->dst.val, c->op_bytes); | ||
1426 | } | ||
1427 | |||
1141 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, | 1428 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, |
1142 | struct x86_emulate_ops *ops, | 1429 | struct x86_emulate_ops *ops, |
1143 | void *dest, int len) | 1430 | void *dest, int len) |
@@ -1145,9 +1432,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1145 | int rc; | 1432 | int rc; |
1146 | unsigned long val, change_mask; | 1433 | unsigned long val, change_mask; |
1147 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1434 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1148 | int cpl = ops->cpl(ctxt->vcpu); | 1435 | int cpl = ops->cpl(ctxt); |
1149 | 1436 | ||
1150 | rc = emulate_pop(ctxt, ops, &val, len); | 1437 | rc = emulate_pop(ctxt, &val, len); |
1151 | if (rc != X86EMUL_CONTINUE) | 1438 | if (rc != X86EMUL_CONTINUE) |
1152 | return rc; | 1439 | return rc; |
1153 | 1440 | ||
@@ -1179,14 +1466,24 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1179 | return rc; | 1466 | return rc; |
1180 | } | 1467 | } |
1181 | 1468 | ||
1182 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, | 1469 | static int em_popf(struct x86_emulate_ctxt *ctxt) |
1183 | struct x86_emulate_ops *ops, int seg) | ||
1184 | { | 1470 | { |
1185 | struct decode_cache *c = &ctxt->decode; | 1471 | struct decode_cache *c = &ctxt->decode; |
1186 | 1472 | ||
1187 | c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); | 1473 | c->dst.type = OP_REG; |
1474 | c->dst.addr.reg = &ctxt->eflags; | ||
1475 | c->dst.bytes = c->op_bytes; | ||
1476 | return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); | ||
1477 | } | ||
1188 | 1478 | ||
1189 | emulate_push(ctxt, ops); | 1479 | static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, |
1480 | struct x86_emulate_ops *ops, int seg) | ||
1481 | { | ||
1482 | struct decode_cache *c = &ctxt->decode; | ||
1483 | |||
1484 | c->src.val = get_segment_selector(ctxt, seg); | ||
1485 | |||
1486 | return em_push(ctxt); | ||
1190 | } | 1487 | } |
1191 | 1488 | ||
1192 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | 1489 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, |
@@ -1196,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1196 | unsigned long selector; | 1493 | unsigned long selector; |
1197 | int rc; | 1494 | int rc; |
1198 | 1495 | ||
1199 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | 1496 | rc = emulate_pop(ctxt, &selector, c->op_bytes); |
1200 | if (rc != X86EMUL_CONTINUE) | 1497 | if (rc != X86EMUL_CONTINUE) |
1201 | return rc; | 1498 | return rc; |
1202 | 1499 | ||
@@ -1204,8 +1501,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1204 | return rc; | 1501 | return rc; |
1205 | } | 1502 | } |
1206 | 1503 | ||
1207 | static int emulate_pusha(struct x86_emulate_ctxt *ctxt, | 1504 | static int em_pusha(struct x86_emulate_ctxt *ctxt) |
1208 | struct x86_emulate_ops *ops) | ||
1209 | { | 1505 | { |
1210 | struct decode_cache *c = &ctxt->decode; | 1506 | struct decode_cache *c = &ctxt->decode; |
1211 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | 1507 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; |
@@ -1216,23 +1512,25 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, | |||
1216 | (reg == VCPU_REGS_RSP) ? | 1512 | (reg == VCPU_REGS_RSP) ? |
1217 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | 1513 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); |
1218 | 1514 | ||
1219 | emulate_push(ctxt, ops); | 1515 | rc = em_push(ctxt); |
1220 | |||
1221 | rc = writeback(ctxt, ops); | ||
1222 | if (rc != X86EMUL_CONTINUE) | 1516 | if (rc != X86EMUL_CONTINUE) |
1223 | return rc; | 1517 | return rc; |
1224 | 1518 | ||
1225 | ++reg; | 1519 | ++reg; |
1226 | } | 1520 | } |
1227 | 1521 | ||
1228 | /* Disable writeback. */ | ||
1229 | c->dst.type = OP_NONE; | ||
1230 | |||
1231 | return rc; | 1522 | return rc; |
1232 | } | 1523 | } |
1233 | 1524 | ||
1234 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | 1525 | static int em_pushf(struct x86_emulate_ctxt *ctxt) |
1235 | struct x86_emulate_ops *ops) | 1526 | { |
1527 | struct decode_cache *c = &ctxt->decode; | ||
1528 | |||
1529 | c->src.val = (unsigned long)ctxt->eflags; | ||
1530 | return em_push(ctxt); | ||
1531 | } | ||
1532 | |||
1533 | static int em_popa(struct x86_emulate_ctxt *ctxt) | ||
1236 | { | 1534 | { |
1237 | struct decode_cache *c = &ctxt->decode; | 1535 | struct decode_cache *c = &ctxt->decode; |
1238 | int rc = X86EMUL_CONTINUE; | 1536 | int rc = X86EMUL_CONTINUE; |
@@ -1245,7 +1543,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1245 | --reg; | 1543 | --reg; |
1246 | } | 1544 | } |
1247 | 1545 | ||
1248 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | 1546 | rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); |
1249 | if (rc != X86EMUL_CONTINUE) | 1547 | if (rc != X86EMUL_CONTINUE) |
1250 | break; | 1548 | break; |
1251 | --reg; | 1549 | --reg; |
@@ -1265,37 +1563,32 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, | |||
1265 | 1563 | ||
1266 | /* TODO: Add limit checks */ | 1564 | /* TODO: Add limit checks */ |
1267 | c->src.val = ctxt->eflags; | 1565 | c->src.val = ctxt->eflags; |
1268 | emulate_push(ctxt, ops); | 1566 | rc = em_push(ctxt); |
1269 | rc = writeback(ctxt, ops); | ||
1270 | if (rc != X86EMUL_CONTINUE) | 1567 | if (rc != X86EMUL_CONTINUE) |
1271 | return rc; | 1568 | return rc; |
1272 | 1569 | ||
1273 | ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); | 1570 | ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); |
1274 | 1571 | ||
1275 | c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | 1572 | c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); |
1276 | emulate_push(ctxt, ops); | 1573 | rc = em_push(ctxt); |
1277 | rc = writeback(ctxt, ops); | ||
1278 | if (rc != X86EMUL_CONTINUE) | 1574 | if (rc != X86EMUL_CONTINUE) |
1279 | return rc; | 1575 | return rc; |
1280 | 1576 | ||
1281 | c->src.val = c->eip; | 1577 | c->src.val = c->eip; |
1282 | emulate_push(ctxt, ops); | 1578 | rc = em_push(ctxt); |
1283 | rc = writeback(ctxt, ops); | ||
1284 | if (rc != X86EMUL_CONTINUE) | 1579 | if (rc != X86EMUL_CONTINUE) |
1285 | return rc; | 1580 | return rc; |
1286 | 1581 | ||
1287 | c->dst.type = OP_NONE; | 1582 | ops->get_idt(ctxt, &dt); |
1288 | |||
1289 | ops->get_idt(&dt, ctxt->vcpu); | ||
1290 | 1583 | ||
1291 | eip_addr = dt.address + (irq << 2); | 1584 | eip_addr = dt.address + (irq << 2); |
1292 | cs_addr = dt.address + (irq << 2) + 2; | 1585 | cs_addr = dt.address + (irq << 2) + 2; |
1293 | 1586 | ||
1294 | rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); | 1587 | rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); |
1295 | if (rc != X86EMUL_CONTINUE) | 1588 | if (rc != X86EMUL_CONTINUE) |
1296 | return rc; | 1589 | return rc; |
1297 | 1590 | ||
1298 | rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); | 1591 | rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); |
1299 | if (rc != X86EMUL_CONTINUE) | 1592 | if (rc != X86EMUL_CONTINUE) |
1300 | return rc; | 1593 | return rc; |
1301 | 1594 | ||
@@ -1339,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1339 | 1632 | ||
1340 | /* TODO: Add stack limit check */ | 1633 | /* TODO: Add stack limit check */ |
1341 | 1634 | ||
1342 | rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); | 1635 | rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); |
1343 | 1636 | ||
1344 | if (rc != X86EMUL_CONTINUE) | 1637 | if (rc != X86EMUL_CONTINUE) |
1345 | return rc; | 1638 | return rc; |
@@ -1347,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1347 | if (temp_eip & ~0xffff) | 1640 | if (temp_eip & ~0xffff) |
1348 | return emulate_gp(ctxt, 0); | 1641 | return emulate_gp(ctxt, 0); |
1349 | 1642 | ||
1350 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1643 | rc = emulate_pop(ctxt, &cs, c->op_bytes); |
1351 | 1644 | ||
1352 | if (rc != X86EMUL_CONTINUE) | 1645 | if (rc != X86EMUL_CONTINUE) |
1353 | return rc; | 1646 | return rc; |
1354 | 1647 | ||
1355 | rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); | 1648 | rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); |
1356 | 1649 | ||
1357 | if (rc != X86EMUL_CONTINUE) | 1650 | if (rc != X86EMUL_CONTINUE) |
1358 | return rc; | 1651 | return rc; |
@@ -1394,15 +1687,31 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, | |||
1394 | } | 1687 | } |
1395 | } | 1688 | } |
1396 | 1689 | ||
1397 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | 1690 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) |
1398 | struct x86_emulate_ops *ops) | 1691 | { |
1692 | struct decode_cache *c = &ctxt->decode; | ||
1693 | int rc; | ||
1694 | unsigned short sel; | ||
1695 | |||
1696 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | ||
1697 | |||
1698 | rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); | ||
1699 | if (rc != X86EMUL_CONTINUE) | ||
1700 | return rc; | ||
1701 | |||
1702 | c->eip = 0; | ||
1703 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | ||
1704 | return X86EMUL_CONTINUE; | ||
1705 | } | ||
1706 | |||
1707 | static int em_grp1a(struct x86_emulate_ctxt *ctxt) | ||
1399 | { | 1708 | { |
1400 | struct decode_cache *c = &ctxt->decode; | 1709 | struct decode_cache *c = &ctxt->decode; |
1401 | 1710 | ||
1402 | return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); | 1711 | return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); |
1403 | } | 1712 | } |
1404 | 1713 | ||
1405 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | 1714 | static int em_grp2(struct x86_emulate_ctxt *ctxt) |
1406 | { | 1715 | { |
1407 | struct decode_cache *c = &ctxt->decode; | 1716 | struct decode_cache *c = &ctxt->decode; |
1408 | switch (c->modrm_reg) { | 1717 | switch (c->modrm_reg) { |
@@ -1429,10 +1738,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | |||
1429 | emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); | 1738 | emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); |
1430 | break; | 1739 | break; |
1431 | } | 1740 | } |
1741 | return X86EMUL_CONTINUE; | ||
1432 | } | 1742 | } |
1433 | 1743 | ||
1434 | static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | 1744 | static int em_grp3(struct x86_emulate_ctxt *ctxt) |
1435 | struct x86_emulate_ops *ops) | ||
1436 | { | 1745 | { |
1437 | struct decode_cache *c = &ctxt->decode; | 1746 | struct decode_cache *c = &ctxt->decode; |
1438 | unsigned long *rax = &c->regs[VCPU_REGS_RAX]; | 1747 | unsigned long *rax = &c->regs[VCPU_REGS_RAX]; |
@@ -1471,10 +1780,10 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1471 | return X86EMUL_CONTINUE; | 1780 | return X86EMUL_CONTINUE; |
1472 | } | 1781 | } |
1473 | 1782 | ||
1474 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | 1783 | static int em_grp45(struct x86_emulate_ctxt *ctxt) |
1475 | struct x86_emulate_ops *ops) | ||
1476 | { | 1784 | { |
1477 | struct decode_cache *c = &ctxt->decode; | 1785 | struct decode_cache *c = &ctxt->decode; |
1786 | int rc = X86EMUL_CONTINUE; | ||
1478 | 1787 | ||
1479 | switch (c->modrm_reg) { | 1788 | switch (c->modrm_reg) { |
1480 | case 0: /* inc */ | 1789 | case 0: /* inc */ |
@@ -1488,21 +1797,23 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1488 | old_eip = c->eip; | 1797 | old_eip = c->eip; |
1489 | c->eip = c->src.val; | 1798 | c->eip = c->src.val; |
1490 | c->src.val = old_eip; | 1799 | c->src.val = old_eip; |
1491 | emulate_push(ctxt, ops); | 1800 | rc = em_push(ctxt); |
1492 | break; | 1801 | break; |
1493 | } | 1802 | } |
1494 | case 4: /* jmp abs */ | 1803 | case 4: /* jmp abs */ |
1495 | c->eip = c->src.val; | 1804 | c->eip = c->src.val; |
1496 | break; | 1805 | break; |
1806 | case 5: /* jmp far */ | ||
1807 | rc = em_jmp_far(ctxt); | ||
1808 | break; | ||
1497 | case 6: /* push */ | 1809 | case 6: /* push */ |
1498 | emulate_push(ctxt, ops); | 1810 | rc = em_push(ctxt); |
1499 | break; | 1811 | break; |
1500 | } | 1812 | } |
1501 | return X86EMUL_CONTINUE; | 1813 | return rc; |
1502 | } | 1814 | } |
1503 | 1815 | ||
1504 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | 1816 | static int em_grp9(struct x86_emulate_ctxt *ctxt) |
1505 | struct x86_emulate_ops *ops) | ||
1506 | { | 1817 | { |
1507 | struct decode_cache *c = &ctxt->decode; | 1818 | struct decode_cache *c = &ctxt->decode; |
1508 | u64 old = c->dst.orig_val64; | 1819 | u64 old = c->dst.orig_val64; |
@@ -1528,12 +1839,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1528 | int rc; | 1839 | int rc; |
1529 | unsigned long cs; | 1840 | unsigned long cs; |
1530 | 1841 | ||
1531 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); | 1842 | rc = emulate_pop(ctxt, &c->eip, c->op_bytes); |
1532 | if (rc != X86EMUL_CONTINUE) | 1843 | if (rc != X86EMUL_CONTINUE) |
1533 | return rc; | 1844 | return rc; |
1534 | if (c->op_bytes == 4) | 1845 | if (c->op_bytes == 4) |
1535 | c->eip = (u32)c->eip; | 1846 | c->eip = (u32)c->eip; |
1536 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1847 | rc = emulate_pop(ctxt, &cs, c->op_bytes); |
1537 | if (rc != X86EMUL_CONTINUE) | 1848 | if (rc != X86EMUL_CONTINUE) |
1538 | return rc; | 1849 | return rc; |
1539 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); | 1850 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); |
@@ -1562,8 +1873,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
1562 | struct x86_emulate_ops *ops, struct desc_struct *cs, | 1873 | struct x86_emulate_ops *ops, struct desc_struct *cs, |
1563 | struct desc_struct *ss) | 1874 | struct desc_struct *ss) |
1564 | { | 1875 | { |
1876 | u16 selector; | ||
1877 | |||
1565 | memset(cs, 0, sizeof(struct desc_struct)); | 1878 | memset(cs, 0, sizeof(struct desc_struct)); |
1566 | ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); | 1879 | ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); |
1567 | memset(ss, 0, sizeof(struct desc_struct)); | 1880 | memset(ss, 0, sizeof(struct desc_struct)); |
1568 | 1881 | ||
1569 | cs->l = 0; /* will be adjusted later */ | 1882 | cs->l = 0; /* will be adjusted later */ |
@@ -1593,44 +1906,44 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1593 | struct desc_struct cs, ss; | 1906 | struct desc_struct cs, ss; |
1594 | u64 msr_data; | 1907 | u64 msr_data; |
1595 | u16 cs_sel, ss_sel; | 1908 | u16 cs_sel, ss_sel; |
1909 | u64 efer = 0; | ||
1596 | 1910 | ||
1597 | /* syscall is not available in real mode */ | 1911 | /* syscall is not available in real mode */ |
1598 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1912 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1599 | ctxt->mode == X86EMUL_MODE_VM86) | 1913 | ctxt->mode == X86EMUL_MODE_VM86) |
1600 | return emulate_ud(ctxt); | 1914 | return emulate_ud(ctxt); |
1601 | 1915 | ||
1916 | ops->get_msr(ctxt, MSR_EFER, &efer); | ||
1602 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1917 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1603 | 1918 | ||
1604 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1919 | ops->get_msr(ctxt, MSR_STAR, &msr_data); |
1605 | msr_data >>= 32; | 1920 | msr_data >>= 32; |
1606 | cs_sel = (u16)(msr_data & 0xfffc); | 1921 | cs_sel = (u16)(msr_data & 0xfffc); |
1607 | ss_sel = (u16)(msr_data + 8); | 1922 | ss_sel = (u16)(msr_data + 8); |
1608 | 1923 | ||
1609 | if (is_long_mode(ctxt->vcpu)) { | 1924 | if (efer & EFER_LMA) { |
1610 | cs.d = 0; | 1925 | cs.d = 0; |
1611 | cs.l = 1; | 1926 | cs.l = 1; |
1612 | } | 1927 | } |
1613 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); | 1928 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
1614 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1929 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
1615 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); | ||
1616 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1617 | 1930 | ||
1618 | c->regs[VCPU_REGS_RCX] = c->eip; | 1931 | c->regs[VCPU_REGS_RCX] = c->eip; |
1619 | if (is_long_mode(ctxt->vcpu)) { | 1932 | if (efer & EFER_LMA) { |
1620 | #ifdef CONFIG_X86_64 | 1933 | #ifdef CONFIG_X86_64 |
1621 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; | 1934 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; |
1622 | 1935 | ||
1623 | ops->get_msr(ctxt->vcpu, | 1936 | ops->get_msr(ctxt, |
1624 | ctxt->mode == X86EMUL_MODE_PROT64 ? | 1937 | ctxt->mode == X86EMUL_MODE_PROT64 ? |
1625 | MSR_LSTAR : MSR_CSTAR, &msr_data); | 1938 | MSR_LSTAR : MSR_CSTAR, &msr_data); |
1626 | c->eip = msr_data; | 1939 | c->eip = msr_data; |
1627 | 1940 | ||
1628 | ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); | 1941 | ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); |
1629 | ctxt->eflags &= ~(msr_data | EFLG_RF); | 1942 | ctxt->eflags &= ~(msr_data | EFLG_RF); |
1630 | #endif | 1943 | #endif |
1631 | } else { | 1944 | } else { |
1632 | /* legacy mode */ | 1945 | /* legacy mode */ |
1633 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1946 | ops->get_msr(ctxt, MSR_STAR, &msr_data); |
1634 | c->eip = (u32)msr_data; | 1947 | c->eip = (u32)msr_data; |
1635 | 1948 | ||
1636 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1949 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
@@ -1646,7 +1959,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1646 | struct desc_struct cs, ss; | 1959 | struct desc_struct cs, ss; |
1647 | u64 msr_data; | 1960 | u64 msr_data; |
1648 | u16 cs_sel, ss_sel; | 1961 | u16 cs_sel, ss_sel; |
1962 | u64 efer = 0; | ||
1649 | 1963 | ||
1964 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
1650 | /* inject #GP if in real mode */ | 1965 | /* inject #GP if in real mode */ |
1651 | if (ctxt->mode == X86EMUL_MODE_REAL) | 1966 | if (ctxt->mode == X86EMUL_MODE_REAL) |
1652 | return emulate_gp(ctxt, 0); | 1967 | return emulate_gp(ctxt, 0); |
@@ -1659,7 +1974,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1659 | 1974 | ||
1660 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1975 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1661 | 1976 | ||
1662 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 1977 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); |
1663 | switch (ctxt->mode) { | 1978 | switch (ctxt->mode) { |
1664 | case X86EMUL_MODE_PROT32: | 1979 | case X86EMUL_MODE_PROT32: |
1665 | if ((msr_data & 0xfffc) == 0x0) | 1980 | if ((msr_data & 0xfffc) == 0x0) |
@@ -1676,21 +1991,18 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1676 | cs_sel &= ~SELECTOR_RPL_MASK; | 1991 | cs_sel &= ~SELECTOR_RPL_MASK; |
1677 | ss_sel = cs_sel + 8; | 1992 | ss_sel = cs_sel + 8; |
1678 | ss_sel &= ~SELECTOR_RPL_MASK; | 1993 | ss_sel &= ~SELECTOR_RPL_MASK; |
1679 | if (ctxt->mode == X86EMUL_MODE_PROT64 | 1994 | if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { |
1680 | || is_long_mode(ctxt->vcpu)) { | ||
1681 | cs.d = 0; | 1995 | cs.d = 0; |
1682 | cs.l = 1; | 1996 | cs.l = 1; |
1683 | } | 1997 | } |
1684 | 1998 | ||
1685 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); | 1999 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
1686 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 2000 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
1687 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); | ||
1688 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1689 | 2001 | ||
1690 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); | 2002 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); |
1691 | c->eip = msr_data; | 2003 | c->eip = msr_data; |
1692 | 2004 | ||
1693 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); | 2005 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); |
1694 | c->regs[VCPU_REGS_RSP] = msr_data; | 2006 | c->regs[VCPU_REGS_RSP] = msr_data; |
1695 | 2007 | ||
1696 | return X86EMUL_CONTINUE; | 2008 | return X86EMUL_CONTINUE; |
@@ -1719,7 +2031,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1719 | 2031 | ||
1720 | cs.dpl = 3; | 2032 | cs.dpl = 3; |
1721 | ss.dpl = 3; | 2033 | ss.dpl = 3; |
1722 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 2034 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); |
1723 | switch (usermode) { | 2035 | switch (usermode) { |
1724 | case X86EMUL_MODE_PROT32: | 2036 | case X86EMUL_MODE_PROT32: |
1725 | cs_sel = (u16)(msr_data + 16); | 2037 | cs_sel = (u16)(msr_data + 16); |
@@ -1739,10 +2051,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1739 | cs_sel |= SELECTOR_RPL_MASK; | 2051 | cs_sel |= SELECTOR_RPL_MASK; |
1740 | ss_sel |= SELECTOR_RPL_MASK; | 2052 | ss_sel |= SELECTOR_RPL_MASK; |
1741 | 2053 | ||
1742 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); | 2054 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
1743 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 2055 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
1744 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); | ||
1745 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1746 | 2056 | ||
1747 | c->eip = c->regs[VCPU_REGS_RDX]; | 2057 | c->eip = c->regs[VCPU_REGS_RDX]; |
1748 | c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; | 2058 | c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; |
@@ -1759,7 +2069,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, | |||
1759 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2069 | if (ctxt->mode == X86EMUL_MODE_VM86) |
1760 | return true; | 2070 | return true; |
1761 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2071 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1762 | return ops->cpl(ctxt->vcpu) > iopl; | 2072 | return ops->cpl(ctxt) > iopl; |
1763 | } | 2073 | } |
1764 | 2074 | ||
1765 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2075 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
@@ -1769,11 +2079,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
1769 | struct desc_struct tr_seg; | 2079 | struct desc_struct tr_seg; |
1770 | u32 base3; | 2080 | u32 base3; |
1771 | int r; | 2081 | int r; |
1772 | u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; | 2082 | u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7; |
1773 | unsigned mask = (1 << len) - 1; | 2083 | unsigned mask = (1 << len) - 1; |
1774 | unsigned long base; | 2084 | unsigned long base; |
1775 | 2085 | ||
1776 | ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); | 2086 | ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); |
1777 | if (!tr_seg.p) | 2087 | if (!tr_seg.p) |
1778 | return false; | 2088 | return false; |
1779 | if (desc_limit_scaled(&tr_seg) < 103) | 2089 | if (desc_limit_scaled(&tr_seg) < 103) |
@@ -1782,13 +2092,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
1782 | #ifdef CONFIG_X86_64 | 2092 | #ifdef CONFIG_X86_64 |
1783 | base |= ((u64)base3) << 32; | 2093 | base |= ((u64)base3) << 32; |
1784 | #endif | 2094 | #endif |
1785 | r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); | 2095 | r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); |
1786 | if (r != X86EMUL_CONTINUE) | 2096 | if (r != X86EMUL_CONTINUE) |
1787 | return false; | 2097 | return false; |
1788 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) | 2098 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) |
1789 | return false; | 2099 | return false; |
1790 | r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, | 2100 | r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); |
1791 | NULL); | ||
1792 | if (r != X86EMUL_CONTINUE) | 2101 | if (r != X86EMUL_CONTINUE) |
1793 | return false; | 2102 | return false; |
1794 | if ((perm >> bit_idx) & mask) | 2103 | if ((perm >> bit_idx) & mask) |
@@ -1829,11 +2138,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | |||
1829 | tss->si = c->regs[VCPU_REGS_RSI]; | 2138 | tss->si = c->regs[VCPU_REGS_RSI]; |
1830 | tss->di = c->regs[VCPU_REGS_RDI]; | 2139 | tss->di = c->regs[VCPU_REGS_RDI]; |
1831 | 2140 | ||
1832 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | 2141 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); |
1833 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | 2142 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
1834 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | 2143 | tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); |
1835 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | 2144 | tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); |
1836 | tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | 2145 | tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR); |
1837 | } | 2146 | } |
1838 | 2147 | ||
1839 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | 2148 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, |
@@ -1858,11 +2167,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
1858 | * SDM says that segment selectors are loaded before segment | 2167 | * SDM says that segment selectors are loaded before segment |
1859 | * descriptors | 2168 | * descriptors |
1860 | */ | 2169 | */ |
1861 | ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); | 2170 | set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); |
1862 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | 2171 | set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); |
1863 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | 2172 | set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); |
1864 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | 2173 | set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); |
1865 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | 2174 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); |
1866 | 2175 | ||
1867 | /* | 2176 | /* |
1868 | * Now load segment descriptors. If fault happenes at this stage | 2177 | * Now load segment descriptors. If fault happenes at this stage |
@@ -1896,7 +2205,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1896 | int ret; | 2205 | int ret; |
1897 | u32 new_tss_base = get_desc_base(new_desc); | 2206 | u32 new_tss_base = get_desc_base(new_desc); |
1898 | 2207 | ||
1899 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2208 | ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
1900 | &ctxt->exception); | 2209 | &ctxt->exception); |
1901 | if (ret != X86EMUL_CONTINUE) | 2210 | if (ret != X86EMUL_CONTINUE) |
1902 | /* FIXME: need to provide precise fault address */ | 2211 | /* FIXME: need to provide precise fault address */ |
@@ -1904,13 +2213,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1904 | 2213 | ||
1905 | save_state_to_tss16(ctxt, ops, &tss_seg); | 2214 | save_state_to_tss16(ctxt, ops, &tss_seg); |
1906 | 2215 | ||
1907 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2216 | ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
1908 | &ctxt->exception); | 2217 | &ctxt->exception); |
1909 | if (ret != X86EMUL_CONTINUE) | 2218 | if (ret != X86EMUL_CONTINUE) |
1910 | /* FIXME: need to provide precise fault address */ | 2219 | /* FIXME: need to provide precise fault address */ |
1911 | return ret; | 2220 | return ret; |
1912 | 2221 | ||
1913 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2222 | ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, |
1914 | &ctxt->exception); | 2223 | &ctxt->exception); |
1915 | if (ret != X86EMUL_CONTINUE) | 2224 | if (ret != X86EMUL_CONTINUE) |
1916 | /* FIXME: need to provide precise fault address */ | 2225 | /* FIXME: need to provide precise fault address */ |
@@ -1919,10 +2228,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1919 | if (old_tss_sel != 0xffff) { | 2228 | if (old_tss_sel != 0xffff) { |
1920 | tss_seg.prev_task_link = old_tss_sel; | 2229 | tss_seg.prev_task_link = old_tss_sel; |
1921 | 2230 | ||
1922 | ret = ops->write_std(new_tss_base, | 2231 | ret = ops->write_std(ctxt, new_tss_base, |
1923 | &tss_seg.prev_task_link, | 2232 | &tss_seg.prev_task_link, |
1924 | sizeof tss_seg.prev_task_link, | 2233 | sizeof tss_seg.prev_task_link, |
1925 | ctxt->vcpu, &ctxt->exception); | 2234 | &ctxt->exception); |
1926 | if (ret != X86EMUL_CONTINUE) | 2235 | if (ret != X86EMUL_CONTINUE) |
1927 | /* FIXME: need to provide precise fault address */ | 2236 | /* FIXME: need to provide precise fault address */ |
1928 | return ret; | 2237 | return ret; |
@@ -1937,7 +2246,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | |||
1937 | { | 2246 | { |
1938 | struct decode_cache *c = &ctxt->decode; | 2247 | struct decode_cache *c = &ctxt->decode; |
1939 | 2248 | ||
1940 | tss->cr3 = ops->get_cr(3, ctxt->vcpu); | 2249 | tss->cr3 = ops->get_cr(ctxt, 3); |
1941 | tss->eip = c->eip; | 2250 | tss->eip = c->eip; |
1942 | tss->eflags = ctxt->eflags; | 2251 | tss->eflags = ctxt->eflags; |
1943 | tss->eax = c->regs[VCPU_REGS_RAX]; | 2252 | tss->eax = c->regs[VCPU_REGS_RAX]; |
@@ -1949,13 +2258,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | |||
1949 | tss->esi = c->regs[VCPU_REGS_RSI]; | 2258 | tss->esi = c->regs[VCPU_REGS_RSI]; |
1950 | tss->edi = c->regs[VCPU_REGS_RDI]; | 2259 | tss->edi = c->regs[VCPU_REGS_RDI]; |
1951 | 2260 | ||
1952 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | 2261 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); |
1953 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | 2262 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
1954 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | 2263 | tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); |
1955 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | 2264 | tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); |
1956 | tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); | 2265 | tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); |
1957 | tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); | 2266 | tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); |
1958 | tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | 2267 | tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR); |
1959 | } | 2268 | } |
1960 | 2269 | ||
1961 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | 2270 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, |
@@ -1965,7 +2274,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
1965 | struct decode_cache *c = &ctxt->decode; | 2274 | struct decode_cache *c = &ctxt->decode; |
1966 | int ret; | 2275 | int ret; |
1967 | 2276 | ||
1968 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) | 2277 | if (ops->set_cr(ctxt, 3, tss->cr3)) |
1969 | return emulate_gp(ctxt, 0); | 2278 | return emulate_gp(ctxt, 0); |
1970 | c->eip = tss->eip; | 2279 | c->eip = tss->eip; |
1971 | ctxt->eflags = tss->eflags | 2; | 2280 | ctxt->eflags = tss->eflags | 2; |
@@ -1982,13 +2291,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
1982 | * SDM says that segment selectors are loaded before segment | 2291 | * SDM says that segment selectors are loaded before segment |
1983 | * descriptors | 2292 | * descriptors |
1984 | */ | 2293 | */ |
1985 | ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); | 2294 | set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); |
1986 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | 2295 | set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); |
1987 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | 2296 | set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); |
1988 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | 2297 | set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); |
1989 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | 2298 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); |
1990 | ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); | 2299 | set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); |
1991 | ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); | 2300 | set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); |
1992 | 2301 | ||
1993 | /* | 2302 | /* |
1994 | * Now load segment descriptors. If fault happenes at this stage | 2303 | * Now load segment descriptors. If fault happenes at this stage |
@@ -2028,7 +2337,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2028 | int ret; | 2337 | int ret; |
2029 | u32 new_tss_base = get_desc_base(new_desc); | 2338 | u32 new_tss_base = get_desc_base(new_desc); |
2030 | 2339 | ||
2031 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2340 | ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
2032 | &ctxt->exception); | 2341 | &ctxt->exception); |
2033 | if (ret != X86EMUL_CONTINUE) | 2342 | if (ret != X86EMUL_CONTINUE) |
2034 | /* FIXME: need to provide precise fault address */ | 2343 | /* FIXME: need to provide precise fault address */ |
@@ -2036,13 +2345,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2036 | 2345 | ||
2037 | save_state_to_tss32(ctxt, ops, &tss_seg); | 2346 | save_state_to_tss32(ctxt, ops, &tss_seg); |
2038 | 2347 | ||
2039 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2348 | ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
2040 | &ctxt->exception); | 2349 | &ctxt->exception); |
2041 | if (ret != X86EMUL_CONTINUE) | 2350 | if (ret != X86EMUL_CONTINUE) |
2042 | /* FIXME: need to provide precise fault address */ | 2351 | /* FIXME: need to provide precise fault address */ |
2043 | return ret; | 2352 | return ret; |
2044 | 2353 | ||
2045 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2354 | ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, |
2046 | &ctxt->exception); | 2355 | &ctxt->exception); |
2047 | if (ret != X86EMUL_CONTINUE) | 2356 | if (ret != X86EMUL_CONTINUE) |
2048 | /* FIXME: need to provide precise fault address */ | 2357 | /* FIXME: need to provide precise fault address */ |
@@ -2051,10 +2360,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2051 | if (old_tss_sel != 0xffff) { | 2360 | if (old_tss_sel != 0xffff) { |
2052 | tss_seg.prev_task_link = old_tss_sel; | 2361 | tss_seg.prev_task_link = old_tss_sel; |
2053 | 2362 | ||
2054 | ret = ops->write_std(new_tss_base, | 2363 | ret = ops->write_std(ctxt, new_tss_base, |
2055 | &tss_seg.prev_task_link, | 2364 | &tss_seg.prev_task_link, |
2056 | sizeof tss_seg.prev_task_link, | 2365 | sizeof tss_seg.prev_task_link, |
2057 | ctxt->vcpu, &ctxt->exception); | 2366 | &ctxt->exception); |
2058 | if (ret != X86EMUL_CONTINUE) | 2367 | if (ret != X86EMUL_CONTINUE) |
2059 | /* FIXME: need to provide precise fault address */ | 2368 | /* FIXME: need to provide precise fault address */ |
2060 | return ret; | 2369 | return ret; |
@@ -2070,9 +2379,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2070 | { | 2379 | { |
2071 | struct desc_struct curr_tss_desc, next_tss_desc; | 2380 | struct desc_struct curr_tss_desc, next_tss_desc; |
2072 | int ret; | 2381 | int ret; |
2073 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | 2382 | u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); |
2074 | ulong old_tss_base = | 2383 | ulong old_tss_base = |
2075 | ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); | 2384 | ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); |
2076 | u32 desc_limit; | 2385 | u32 desc_limit; |
2077 | 2386 | ||
2078 | /* FIXME: old_tss_base == ~0 ? */ | 2387 | /* FIXME: old_tss_base == ~0 ? */ |
@@ -2088,7 +2397,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2088 | 2397 | ||
2089 | if (reason != TASK_SWITCH_IRET) { | 2398 | if (reason != TASK_SWITCH_IRET) { |
2090 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2399 | if ((tss_selector & 3) > next_tss_desc.dpl || |
2091 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) | 2400 | ops->cpl(ctxt) > next_tss_desc.dpl) |
2092 | return emulate_gp(ctxt, 0); | 2401 | return emulate_gp(ctxt, 0); |
2093 | } | 2402 | } |
2094 | 2403 | ||
@@ -2132,9 +2441,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2132 | &next_tss_desc); | 2441 | &next_tss_desc); |
2133 | } | 2442 | } |
2134 | 2443 | ||
2135 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | 2444 | ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); |
2136 | ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); | 2445 | ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); |
2137 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | ||
2138 | 2446 | ||
2139 | if (has_error_code) { | 2447 | if (has_error_code) { |
2140 | struct decode_cache *c = &ctxt->decode; | 2448 | struct decode_cache *c = &ctxt->decode; |
@@ -2142,7 +2450,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2142 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | 2450 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; |
2143 | c->lock_prefix = 0; | 2451 | c->lock_prefix = 0; |
2144 | c->src.val = (unsigned long) error_code; | 2452 | c->src.val = (unsigned long) error_code; |
2145 | emulate_push(ctxt, ops); | 2453 | ret = em_push(ctxt); |
2146 | } | 2454 | } |
2147 | 2455 | ||
2148 | return ret; | 2456 | return ret; |
@@ -2162,13 +2470,10 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2162 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | 2470 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, |
2163 | has_error_code, error_code); | 2471 | has_error_code, error_code); |
2164 | 2472 | ||
2165 | if (rc == X86EMUL_CONTINUE) { | 2473 | if (rc == X86EMUL_CONTINUE) |
2166 | rc = writeback(ctxt, ops); | 2474 | ctxt->eip = c->eip; |
2167 | if (rc == X86EMUL_CONTINUE) | ||
2168 | ctxt->eip = c->eip; | ||
2169 | } | ||
2170 | 2475 | ||
2171 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 2476 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
2172 | } | 2477 | } |
2173 | 2478 | ||
2174 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, | 2479 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, |
@@ -2182,12 +2487,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, | |||
2182 | op->addr.mem.seg = seg; | 2487 | op->addr.mem.seg = seg; |
2183 | } | 2488 | } |
2184 | 2489 | ||
2185 | static int em_push(struct x86_emulate_ctxt *ctxt) | ||
2186 | { | ||
2187 | emulate_push(ctxt, ctxt->ops); | ||
2188 | return X86EMUL_CONTINUE; | ||
2189 | } | ||
2190 | |||
2191 | static int em_das(struct x86_emulate_ctxt *ctxt) | 2490 | static int em_das(struct x86_emulate_ctxt *ctxt) |
2192 | { | 2491 | { |
2193 | struct decode_cache *c = &ctxt->decode; | 2492 | struct decode_cache *c = &ctxt->decode; |
@@ -2234,7 +2533,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
2234 | ulong old_eip; | 2533 | ulong old_eip; |
2235 | int rc; | 2534 | int rc; |
2236 | 2535 | ||
2237 | old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | 2536 | old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
2238 | old_eip = c->eip; | 2537 | old_eip = c->eip; |
2239 | 2538 | ||
2240 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | 2539 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); |
@@ -2245,20 +2544,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
2245 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | 2544 | memcpy(&c->eip, c->src.valptr, c->op_bytes); |
2246 | 2545 | ||
2247 | c->src.val = old_cs; | 2546 | c->src.val = old_cs; |
2248 | emulate_push(ctxt, ctxt->ops); | 2547 | rc = em_push(ctxt); |
2249 | rc = writeback(ctxt, ctxt->ops); | ||
2250 | if (rc != X86EMUL_CONTINUE) | 2548 | if (rc != X86EMUL_CONTINUE) |
2251 | return rc; | 2549 | return rc; |
2252 | 2550 | ||
2253 | c->src.val = old_eip; | 2551 | c->src.val = old_eip; |
2254 | emulate_push(ctxt, ctxt->ops); | 2552 | return em_push(ctxt); |
2255 | rc = writeback(ctxt, ctxt->ops); | ||
2256 | if (rc != X86EMUL_CONTINUE) | ||
2257 | return rc; | ||
2258 | |||
2259 | c->dst.type = OP_NONE; | ||
2260 | |||
2261 | return X86EMUL_CONTINUE; | ||
2262 | } | 2553 | } |
2263 | 2554 | ||
2264 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | 2555 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) |
@@ -2269,13 +2560,79 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | |||
2269 | c->dst.type = OP_REG; | 2560 | c->dst.type = OP_REG; |
2270 | c->dst.addr.reg = &c->eip; | 2561 | c->dst.addr.reg = &c->eip; |
2271 | c->dst.bytes = c->op_bytes; | 2562 | c->dst.bytes = c->op_bytes; |
2272 | rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); | 2563 | rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); |
2273 | if (rc != X86EMUL_CONTINUE) | 2564 | if (rc != X86EMUL_CONTINUE) |
2274 | return rc; | 2565 | return rc; |
2275 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); | 2566 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); |
2276 | return X86EMUL_CONTINUE; | 2567 | return X86EMUL_CONTINUE; |
2277 | } | 2568 | } |
2278 | 2569 | ||
2570 | static int em_add(struct x86_emulate_ctxt *ctxt) | ||
2571 | { | ||
2572 | struct decode_cache *c = &ctxt->decode; | ||
2573 | |||
2574 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | ||
2575 | return X86EMUL_CONTINUE; | ||
2576 | } | ||
2577 | |||
2578 | static int em_or(struct x86_emulate_ctxt *ctxt) | ||
2579 | { | ||
2580 | struct decode_cache *c = &ctxt->decode; | ||
2581 | |||
2582 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | ||
2583 | return X86EMUL_CONTINUE; | ||
2584 | } | ||
2585 | |||
2586 | static int em_adc(struct x86_emulate_ctxt *ctxt) | ||
2587 | { | ||
2588 | struct decode_cache *c = &ctxt->decode; | ||
2589 | |||
2590 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | ||
2591 | return X86EMUL_CONTINUE; | ||
2592 | } | ||
2593 | |||
2594 | static int em_sbb(struct x86_emulate_ctxt *ctxt) | ||
2595 | { | ||
2596 | struct decode_cache *c = &ctxt->decode; | ||
2597 | |||
2598 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | ||
2599 | return X86EMUL_CONTINUE; | ||
2600 | } | ||
2601 | |||
2602 | static int em_and(struct x86_emulate_ctxt *ctxt) | ||
2603 | { | ||
2604 | struct decode_cache *c = &ctxt->decode; | ||
2605 | |||
2606 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | ||
2607 | return X86EMUL_CONTINUE; | ||
2608 | } | ||
2609 | |||
2610 | static int em_sub(struct x86_emulate_ctxt *ctxt) | ||
2611 | { | ||
2612 | struct decode_cache *c = &ctxt->decode; | ||
2613 | |||
2614 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); | ||
2615 | return X86EMUL_CONTINUE; | ||
2616 | } | ||
2617 | |||
2618 | static int em_xor(struct x86_emulate_ctxt *ctxt) | ||
2619 | { | ||
2620 | struct decode_cache *c = &ctxt->decode; | ||
2621 | |||
2622 | emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); | ||
2623 | return X86EMUL_CONTINUE; | ||
2624 | } | ||
2625 | |||
2626 | static int em_cmp(struct x86_emulate_ctxt *ctxt) | ||
2627 | { | ||
2628 | struct decode_cache *c = &ctxt->decode; | ||
2629 | |||
2630 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
2631 | /* Disable writeback. */ | ||
2632 | c->dst.type = OP_NONE; | ||
2633 | return X86EMUL_CONTINUE; | ||
2634 | } | ||
2635 | |||
2279 | static int em_imul(struct x86_emulate_ctxt *ctxt) | 2636 | static int em_imul(struct x86_emulate_ctxt *ctxt) |
2280 | { | 2637 | { |
2281 | struct decode_cache *c = &ctxt->decode; | 2638 | struct decode_cache *c = &ctxt->decode; |
@@ -2306,13 +2663,10 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) | |||
2306 | 2663 | ||
2307 | static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | 2664 | static int em_rdtsc(struct x86_emulate_ctxt *ctxt) |
2308 | { | 2665 | { |
2309 | unsigned cpl = ctxt->ops->cpl(ctxt->vcpu); | ||
2310 | struct decode_cache *c = &ctxt->decode; | 2666 | struct decode_cache *c = &ctxt->decode; |
2311 | u64 tsc = 0; | 2667 | u64 tsc = 0; |
2312 | 2668 | ||
2313 | if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) | 2669 | ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); |
2314 | return emulate_gp(ctxt, 0); | ||
2315 | ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); | ||
2316 | c->regs[VCPU_REGS_RAX] = (u32)tsc; | 2670 | c->regs[VCPU_REGS_RAX] = (u32)tsc; |
2317 | c->regs[VCPU_REGS_RDX] = tsc >> 32; | 2671 | c->regs[VCPU_REGS_RDX] = tsc >> 32; |
2318 | return X86EMUL_CONTINUE; | 2672 | return X86EMUL_CONTINUE; |
@@ -2325,22 +2679,375 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) | |||
2325 | return X86EMUL_CONTINUE; | 2679 | return X86EMUL_CONTINUE; |
2326 | } | 2680 | } |
2327 | 2681 | ||
2682 | static int em_movdqu(struct x86_emulate_ctxt *ctxt) | ||
2683 | { | ||
2684 | struct decode_cache *c = &ctxt->decode; | ||
2685 | memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); | ||
2686 | return X86EMUL_CONTINUE; | ||
2687 | } | ||
2688 | |||
2689 | static int em_invlpg(struct x86_emulate_ctxt *ctxt) | ||
2690 | { | ||
2691 | struct decode_cache *c = &ctxt->decode; | ||
2692 | int rc; | ||
2693 | ulong linear; | ||
2694 | |||
2695 | rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); | ||
2696 | if (rc == X86EMUL_CONTINUE) | ||
2697 | ctxt->ops->invlpg(ctxt, linear); | ||
2698 | /* Disable writeback. */ | ||
2699 | c->dst.type = OP_NONE; | ||
2700 | return X86EMUL_CONTINUE; | ||
2701 | } | ||
2702 | |||
2703 | static int em_clts(struct x86_emulate_ctxt *ctxt) | ||
2704 | { | ||
2705 | ulong cr0; | ||
2706 | |||
2707 | cr0 = ctxt->ops->get_cr(ctxt, 0); | ||
2708 | cr0 &= ~X86_CR0_TS; | ||
2709 | ctxt->ops->set_cr(ctxt, 0, cr0); | ||
2710 | return X86EMUL_CONTINUE; | ||
2711 | } | ||
2712 | |||
2713 | static int em_vmcall(struct x86_emulate_ctxt *ctxt) | ||
2714 | { | ||
2715 | struct decode_cache *c = &ctxt->decode; | ||
2716 | int rc; | ||
2717 | |||
2718 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | ||
2719 | return X86EMUL_UNHANDLEABLE; | ||
2720 | |||
2721 | rc = ctxt->ops->fix_hypercall(ctxt); | ||
2722 | if (rc != X86EMUL_CONTINUE) | ||
2723 | return rc; | ||
2724 | |||
2725 | /* Let the processor re-execute the fixed hypercall */ | ||
2726 | c->eip = ctxt->eip; | ||
2727 | /* Disable writeback. */ | ||
2728 | c->dst.type = OP_NONE; | ||
2729 | return X86EMUL_CONTINUE; | ||
2730 | } | ||
2731 | |||
2732 | static int em_lgdt(struct x86_emulate_ctxt *ctxt) | ||
2733 | { | ||
2734 | struct decode_cache *c = &ctxt->decode; | ||
2735 | struct desc_ptr desc_ptr; | ||
2736 | int rc; | ||
2737 | |||
2738 | rc = read_descriptor(ctxt, c->src.addr.mem, | ||
2739 | &desc_ptr.size, &desc_ptr.address, | ||
2740 | c->op_bytes); | ||
2741 | if (rc != X86EMUL_CONTINUE) | ||
2742 | return rc; | ||
2743 | ctxt->ops->set_gdt(ctxt, &desc_ptr); | ||
2744 | /* Disable writeback. */ | ||
2745 | c->dst.type = OP_NONE; | ||
2746 | return X86EMUL_CONTINUE; | ||
2747 | } | ||
2748 | |||
2749 | static int em_vmmcall(struct x86_emulate_ctxt *ctxt) | ||
2750 | { | ||
2751 | struct decode_cache *c = &ctxt->decode; | ||
2752 | int rc; | ||
2753 | |||
2754 | rc = ctxt->ops->fix_hypercall(ctxt); | ||
2755 | |||
2756 | /* Disable writeback. */ | ||
2757 | c->dst.type = OP_NONE; | ||
2758 | return rc; | ||
2759 | } | ||
2760 | |||
2761 | static int em_lidt(struct x86_emulate_ctxt *ctxt) | ||
2762 | { | ||
2763 | struct decode_cache *c = &ctxt->decode; | ||
2764 | struct desc_ptr desc_ptr; | ||
2765 | int rc; | ||
2766 | |||
2767 | rc = read_descriptor(ctxt, c->src.addr.mem, | ||
2768 | &desc_ptr.size, &desc_ptr.address, | ||
2769 | c->op_bytes); | ||
2770 | if (rc != X86EMUL_CONTINUE) | ||
2771 | return rc; | ||
2772 | ctxt->ops->set_idt(ctxt, &desc_ptr); | ||
2773 | /* Disable writeback. */ | ||
2774 | c->dst.type = OP_NONE; | ||
2775 | return X86EMUL_CONTINUE; | ||
2776 | } | ||
2777 | |||
2778 | static int em_smsw(struct x86_emulate_ctxt *ctxt) | ||
2779 | { | ||
2780 | struct decode_cache *c = &ctxt->decode; | ||
2781 | |||
2782 | c->dst.bytes = 2; | ||
2783 | c->dst.val = ctxt->ops->get_cr(ctxt, 0); | ||
2784 | return X86EMUL_CONTINUE; | ||
2785 | } | ||
2786 | |||
2787 | static int em_lmsw(struct x86_emulate_ctxt *ctxt) | ||
2788 | { | ||
2789 | struct decode_cache *c = &ctxt->decode; | ||
2790 | ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) | ||
2791 | | (c->src.val & 0x0f)); | ||
2792 | c->dst.type = OP_NONE; | ||
2793 | return X86EMUL_CONTINUE; | ||
2794 | } | ||
2795 | |||
2796 | static bool valid_cr(int nr) | ||
2797 | { | ||
2798 | switch (nr) { | ||
2799 | case 0: | ||
2800 | case 2 ... 4: | ||
2801 | case 8: | ||
2802 | return true; | ||
2803 | default: | ||
2804 | return false; | ||
2805 | } | ||
2806 | } | ||
2807 | |||
2808 | static int check_cr_read(struct x86_emulate_ctxt *ctxt) | ||
2809 | { | ||
2810 | struct decode_cache *c = &ctxt->decode; | ||
2811 | |||
2812 | if (!valid_cr(c->modrm_reg)) | ||
2813 | return emulate_ud(ctxt); | ||
2814 | |||
2815 | return X86EMUL_CONTINUE; | ||
2816 | } | ||
2817 | |||
2818 | static int check_cr_write(struct x86_emulate_ctxt *ctxt) | ||
2819 | { | ||
2820 | struct decode_cache *c = &ctxt->decode; | ||
2821 | u64 new_val = c->src.val64; | ||
2822 | int cr = c->modrm_reg; | ||
2823 | u64 efer = 0; | ||
2824 | |||
2825 | static u64 cr_reserved_bits[] = { | ||
2826 | 0xffffffff00000000ULL, | ||
2827 | 0, 0, 0, /* CR3 checked later */ | ||
2828 | CR4_RESERVED_BITS, | ||
2829 | 0, 0, 0, | ||
2830 | CR8_RESERVED_BITS, | ||
2831 | }; | ||
2832 | |||
2833 | if (!valid_cr(cr)) | ||
2834 | return emulate_ud(ctxt); | ||
2835 | |||
2836 | if (new_val & cr_reserved_bits[cr]) | ||
2837 | return emulate_gp(ctxt, 0); | ||
2838 | |||
2839 | switch (cr) { | ||
2840 | case 0: { | ||
2841 | u64 cr4; | ||
2842 | if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || | ||
2843 | ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) | ||
2844 | return emulate_gp(ctxt, 0); | ||
2845 | |||
2846 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2847 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
2848 | |||
2849 | if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && | ||
2850 | !(cr4 & X86_CR4_PAE)) | ||
2851 | return emulate_gp(ctxt, 0); | ||
2852 | |||
2853 | break; | ||
2854 | } | ||
2855 | case 3: { | ||
2856 | u64 rsvd = 0; | ||
2857 | |||
2858 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
2859 | if (efer & EFER_LMA) | ||
2860 | rsvd = CR3_L_MODE_RESERVED_BITS; | ||
2861 | else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) | ||
2862 | rsvd = CR3_PAE_RESERVED_BITS; | ||
2863 | else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) | ||
2864 | rsvd = CR3_NONPAE_RESERVED_BITS; | ||
2865 | |||
2866 | if (new_val & rsvd) | ||
2867 | return emulate_gp(ctxt, 0); | ||
2868 | |||
2869 | break; | ||
2870 | } | ||
2871 | case 4: { | ||
2872 | u64 cr4; | ||
2873 | |||
2874 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2875 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
2876 | |||
2877 | if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) | ||
2878 | return emulate_gp(ctxt, 0); | ||
2879 | |||
2880 | break; | ||
2881 | } | ||
2882 | } | ||
2883 | |||
2884 | return X86EMUL_CONTINUE; | ||
2885 | } | ||
2886 | |||
2887 | static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) | ||
2888 | { | ||
2889 | unsigned long dr7; | ||
2890 | |||
2891 | ctxt->ops->get_dr(ctxt, 7, &dr7); | ||
2892 | |||
2893 | /* Check if DR7.Global_Enable is set */ | ||
2894 | return dr7 & (1 << 13); | ||
2895 | } | ||
2896 | |||
2897 | static int check_dr_read(struct x86_emulate_ctxt *ctxt) | ||
2898 | { | ||
2899 | struct decode_cache *c = &ctxt->decode; | ||
2900 | int dr = c->modrm_reg; | ||
2901 | u64 cr4; | ||
2902 | |||
2903 | if (dr > 7) | ||
2904 | return emulate_ud(ctxt); | ||
2905 | |||
2906 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2907 | if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) | ||
2908 | return emulate_ud(ctxt); | ||
2909 | |||
2910 | if (check_dr7_gd(ctxt)) | ||
2911 | return emulate_db(ctxt); | ||
2912 | |||
2913 | return X86EMUL_CONTINUE; | ||
2914 | } | ||
2915 | |||
2916 | static int check_dr_write(struct x86_emulate_ctxt *ctxt) | ||
2917 | { | ||
2918 | struct decode_cache *c = &ctxt->decode; | ||
2919 | u64 new_val = c->src.val64; | ||
2920 | int dr = c->modrm_reg; | ||
2921 | |||
2922 | if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) | ||
2923 | return emulate_gp(ctxt, 0); | ||
2924 | |||
2925 | return check_dr_read(ctxt); | ||
2926 | } | ||
2927 | |||
2928 | static int check_svme(struct x86_emulate_ctxt *ctxt) | ||
2929 | { | ||
2930 | u64 efer; | ||
2931 | |||
2932 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
2933 | |||
2934 | if (!(efer & EFER_SVME)) | ||
2935 | return emulate_ud(ctxt); | ||
2936 | |||
2937 | return X86EMUL_CONTINUE; | ||
2938 | } | ||
2939 | |||
2940 | static int check_svme_pa(struct x86_emulate_ctxt *ctxt) | ||
2941 | { | ||
2942 | u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; | ||
2943 | |||
2944 | /* Valid physical address? */ | ||
2945 | if (rax & 0xffff000000000000ULL) | ||
2946 | return emulate_gp(ctxt, 0); | ||
2947 | |||
2948 | return check_svme(ctxt); | ||
2949 | } | ||
2950 | |||
2951 | static int check_rdtsc(struct x86_emulate_ctxt *ctxt) | ||
2952 | { | ||
2953 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2954 | |||
2955 | if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) | ||
2956 | return emulate_ud(ctxt); | ||
2957 | |||
2958 | return X86EMUL_CONTINUE; | ||
2959 | } | ||
2960 | |||
2961 | static int check_rdpmc(struct x86_emulate_ctxt *ctxt) | ||
2962 | { | ||
2963 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2964 | u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; | ||
2965 | |||
2966 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || | ||
2967 | (rcx > 3)) | ||
2968 | return emulate_gp(ctxt, 0); | ||
2969 | |||
2970 | return X86EMUL_CONTINUE; | ||
2971 | } | ||
2972 | |||
2973 | static int check_perm_in(struct x86_emulate_ctxt *ctxt) | ||
2974 | { | ||
2975 | struct decode_cache *c = &ctxt->decode; | ||
2976 | |||
2977 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
2978 | if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) | ||
2979 | return emulate_gp(ctxt, 0); | ||
2980 | |||
2981 | return X86EMUL_CONTINUE; | ||
2982 | } | ||
2983 | |||
2984 | static int check_perm_out(struct x86_emulate_ctxt *ctxt) | ||
2985 | { | ||
2986 | struct decode_cache *c = &ctxt->decode; | ||
2987 | |||
2988 | c->src.bytes = min(c->src.bytes, 4u); | ||
2989 | if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) | ||
2990 | return emulate_gp(ctxt, 0); | ||
2991 | |||
2992 | return X86EMUL_CONTINUE; | ||
2993 | } | ||
2994 | |||
2328 | #define D(_y) { .flags = (_y) } | 2995 | #define D(_y) { .flags = (_y) } |
2996 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } | ||
2997 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ | ||
2998 | .check_perm = (_p) } | ||
2329 | #define N D(0) | 2999 | #define N D(0) |
3000 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | ||
2330 | #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } | 3001 | #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } |
2331 | #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } | 3002 | #define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } |
2332 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } | 3003 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } |
3004 | #define II(_f, _e, _i) \ | ||
3005 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } | ||
3006 | #define IIP(_f, _e, _i, _p) \ | ||
3007 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ | ||
3008 | .check_perm = (_p) } | ||
3009 | #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } | ||
2333 | 3010 | ||
2334 | #define D2bv(_f) D((_f) | ByteOp), D(_f) | 3011 | #define D2bv(_f) D((_f) | ByteOp), D(_f) |
3012 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) | ||
2335 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) | 3013 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) |
2336 | 3014 | ||
2337 | #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ | 3015 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ |
2338 | D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ | 3016 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
2339 | D2bv(((_f) & ~Lock) | DstAcc | SrcImm) | 3017 | I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) |
2340 | 3018 | ||
3019 | static struct opcode group7_rm1[] = { | ||
3020 | DI(SrcNone | ModRM | Priv, monitor), | ||
3021 | DI(SrcNone | ModRM | Priv, mwait), | ||
3022 | N, N, N, N, N, N, | ||
3023 | }; | ||
3024 | |||
3025 | static struct opcode group7_rm3[] = { | ||
3026 | DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), | ||
3027 | II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), | ||
3028 | DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), | ||
3029 | DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), | ||
3030 | DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), | ||
3031 | DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), | ||
3032 | DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), | ||
3033 | DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), | ||
3034 | }; | ||
3035 | |||
3036 | static struct opcode group7_rm7[] = { | ||
3037 | N, | ||
3038 | DIP(SrcNone | ModRM, rdtscp, check_rdtsc), | ||
3039 | N, N, N, N, N, N, | ||
3040 | }; | ||
2341 | 3041 | ||
2342 | static struct opcode group1[] = { | 3042 | static struct opcode group1[] = { |
2343 | X7(D(Lock)), N | 3043 | I(Lock, em_add), |
3044 | I(Lock, em_or), | ||
3045 | I(Lock, em_adc), | ||
3046 | I(Lock, em_sbb), | ||
3047 | I(Lock, em_and), | ||
3048 | I(Lock, em_sub), | ||
3049 | I(Lock, em_xor), | ||
3050 | I(0, em_cmp), | ||
2344 | }; | 3051 | }; |
2345 | 3052 | ||
2346 | static struct opcode group1A[] = { | 3053 | static struct opcode group1A[] = { |
@@ -2366,16 +3073,28 @@ static struct opcode group5[] = { | |||
2366 | D(SrcMem | ModRM | Stack), N, | 3073 | D(SrcMem | ModRM | Stack), N, |
2367 | }; | 3074 | }; |
2368 | 3075 | ||
3076 | static struct opcode group6[] = { | ||
3077 | DI(ModRM | Prot, sldt), | ||
3078 | DI(ModRM | Prot, str), | ||
3079 | DI(ModRM | Prot | Priv, lldt), | ||
3080 | DI(ModRM | Prot | Priv, ltr), | ||
3081 | N, N, N, N, | ||
3082 | }; | ||
3083 | |||
2369 | static struct group_dual group7 = { { | 3084 | static struct group_dual group7 = { { |
2370 | N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), | 3085 | DI(ModRM | Mov | DstMem | Priv, sgdt), |
2371 | D(SrcNone | ModRM | DstMem | Mov), N, | 3086 | DI(ModRM | Mov | DstMem | Priv, sidt), |
2372 | D(SrcMem16 | ModRM | Mov | Priv), | 3087 | II(ModRM | SrcMem | Priv, em_lgdt, lgdt), |
2373 | D(SrcMem | ModRM | ByteOp | Priv | NoAccess), | 3088 | II(ModRM | SrcMem | Priv, em_lidt, lidt), |
3089 | II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, | ||
3090 | II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), | ||
3091 | II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), | ||
2374 | }, { | 3092 | }, { |
2375 | D(SrcNone | ModRM | Priv | VendorSpecific), N, | 3093 | I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), |
2376 | N, D(SrcNone | ModRM | Priv | VendorSpecific), | 3094 | EXT(0, group7_rm1), |
2377 | D(SrcNone | ModRM | DstMem | Mov), N, | 3095 | N, EXT(0, group7_rm3), |
2378 | D(SrcMem16 | ModRM | Mov | Priv), N, | 3096 | II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, |
3097 | II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), | ||
2379 | } }; | 3098 | } }; |
2380 | 3099 | ||
2381 | static struct opcode group8[] = { | 3100 | static struct opcode group8[] = { |
@@ -2394,35 +3113,40 @@ static struct opcode group11[] = { | |||
2394 | I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), | 3113 | I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), |
2395 | }; | 3114 | }; |
2396 | 3115 | ||
3116 | static struct gprefix pfx_0f_6f_0f_7f = { | ||
3117 | N, N, N, I(Sse, em_movdqu), | ||
3118 | }; | ||
3119 | |||
2397 | static struct opcode opcode_table[256] = { | 3120 | static struct opcode opcode_table[256] = { |
2398 | /* 0x00 - 0x07 */ | 3121 | /* 0x00 - 0x07 */ |
2399 | D6ALU(Lock), | 3122 | I6ALU(Lock, em_add), |
2400 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3123 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), |
2401 | /* 0x08 - 0x0F */ | 3124 | /* 0x08 - 0x0F */ |
2402 | D6ALU(Lock), | 3125 | I6ALU(Lock, em_or), |
2403 | D(ImplicitOps | Stack | No64), N, | 3126 | D(ImplicitOps | Stack | No64), N, |
2404 | /* 0x10 - 0x17 */ | 3127 | /* 0x10 - 0x17 */ |
2405 | D6ALU(Lock), | 3128 | I6ALU(Lock, em_adc), |
2406 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3129 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), |
2407 | /* 0x18 - 0x1F */ | 3130 | /* 0x18 - 0x1F */ |
2408 | D6ALU(Lock), | 3131 | I6ALU(Lock, em_sbb), |
2409 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3132 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), |
2410 | /* 0x20 - 0x27 */ | 3133 | /* 0x20 - 0x27 */ |
2411 | D6ALU(Lock), N, N, | 3134 | I6ALU(Lock, em_and), N, N, |
2412 | /* 0x28 - 0x2F */ | 3135 | /* 0x28 - 0x2F */ |
2413 | D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), | 3136 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), |
2414 | /* 0x30 - 0x37 */ | 3137 | /* 0x30 - 0x37 */ |
2415 | D6ALU(Lock), N, N, | 3138 | I6ALU(Lock, em_xor), N, N, |
2416 | /* 0x38 - 0x3F */ | 3139 | /* 0x38 - 0x3F */ |
2417 | D6ALU(0), N, N, | 3140 | I6ALU(0, em_cmp), N, N, |
2418 | /* 0x40 - 0x4F */ | 3141 | /* 0x40 - 0x4F */ |
2419 | X16(D(DstReg)), | 3142 | X16(D(DstReg)), |
2420 | /* 0x50 - 0x57 */ | 3143 | /* 0x50 - 0x57 */ |
2421 | X8(I(SrcReg | Stack, em_push)), | 3144 | X8(I(SrcReg | Stack, em_push)), |
2422 | /* 0x58 - 0x5F */ | 3145 | /* 0x58 - 0x5F */ |
2423 | X8(D(DstReg | Stack)), | 3146 | X8(I(DstReg | Stack, em_pop)), |
2424 | /* 0x60 - 0x67 */ | 3147 | /* 0x60 - 0x67 */ |
2425 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3148 | I(ImplicitOps | Stack | No64, em_pusha), |
3149 | I(ImplicitOps | Stack | No64, em_popa), | ||
2426 | N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , | 3150 | N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , |
2427 | N, N, N, N, | 3151 | N, N, N, N, |
2428 | /* 0x68 - 0x6F */ | 3152 | /* 0x68 - 0x6F */ |
@@ -2430,8 +3154,8 @@ static struct opcode opcode_table[256] = { | |||
2430 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), | 3154 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), |
2431 | I(SrcImmByte | Mov | Stack, em_push), | 3155 | I(SrcImmByte | Mov | Stack, em_push), |
2432 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), | 3156 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), |
2433 | D2bv(DstDI | Mov | String), /* insb, insw/insd */ | 3157 | D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ |
2434 | D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ | 3158 | D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ |
2435 | /* 0x70 - 0x7F */ | 3159 | /* 0x70 - 0x7F */ |
2436 | X16(D(SrcImmByte)), | 3160 | X16(D(SrcImmByte)), |
2437 | /* 0x80 - 0x87 */ | 3161 | /* 0x80 - 0x87 */ |
@@ -2446,21 +3170,22 @@ static struct opcode opcode_table[256] = { | |||
2446 | D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), | 3170 | D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), |
2447 | D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), | 3171 | D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), |
2448 | /* 0x90 - 0x97 */ | 3172 | /* 0x90 - 0x97 */ |
2449 | X8(D(SrcAcc | DstReg)), | 3173 | DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), |
2450 | /* 0x98 - 0x9F */ | 3174 | /* 0x98 - 0x9F */ |
2451 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), | 3175 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), |
2452 | I(SrcImmFAddr | No64, em_call_far), N, | 3176 | I(SrcImmFAddr | No64, em_call_far), N, |
2453 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, | 3177 | II(ImplicitOps | Stack, em_pushf, pushf), |
3178 | II(ImplicitOps | Stack, em_popf, popf), N, N, | ||
2454 | /* 0xA0 - 0xA7 */ | 3179 | /* 0xA0 - 0xA7 */ |
2455 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3180 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
2456 | I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), | 3181 | I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), |
2457 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 3182 | I2bv(SrcSI | DstDI | Mov | String, em_mov), |
2458 | D2bv(SrcSI | DstDI | String), | 3183 | I2bv(SrcSI | DstDI | String, em_cmp), |
2459 | /* 0xA8 - 0xAF */ | 3184 | /* 0xA8 - 0xAF */ |
2460 | D2bv(DstAcc | SrcImm), | 3185 | D2bv(DstAcc | SrcImm), |
2461 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), | 3186 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), |
2462 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), | 3187 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), |
2463 | D2bv(SrcAcc | DstDI | String), | 3188 | I2bv(SrcAcc | DstDI | String, em_cmp), |
2464 | /* 0xB0 - 0xB7 */ | 3189 | /* 0xB0 - 0xB7 */ |
2465 | X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), | 3190 | X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), |
2466 | /* 0xB8 - 0xBF */ | 3191 | /* 0xB8 - 0xBF */ |
@@ -2473,7 +3198,8 @@ static struct opcode opcode_table[256] = { | |||
2473 | G(ByteOp, group11), G(0, group11), | 3198 | G(ByteOp, group11), G(0, group11), |
2474 | /* 0xC8 - 0xCF */ | 3199 | /* 0xC8 - 0xCF */ |
2475 | N, N, N, D(ImplicitOps | Stack), | 3200 | N, N, N, D(ImplicitOps | Stack), |
2476 | D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), | 3201 | D(ImplicitOps), DI(SrcImmByte, intn), |
3202 | D(ImplicitOps | No64), DI(ImplicitOps, iret), | ||
2477 | /* 0xD0 - 0xD7 */ | 3203 | /* 0xD0 - 0xD7 */ |
2478 | D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), | 3204 | D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), |
2479 | N, N, N, N, | 3205 | N, N, N, N, |
@@ -2481,14 +3207,17 @@ static struct opcode opcode_table[256] = { | |||
2481 | N, N, N, N, N, N, N, N, | 3207 | N, N, N, N, N, N, N, N, |
2482 | /* 0xE0 - 0xE7 */ | 3208 | /* 0xE0 - 0xE7 */ |
2483 | X4(D(SrcImmByte)), | 3209 | X4(D(SrcImmByte)), |
2484 | D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), | 3210 | D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), |
3211 | D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), | ||
2485 | /* 0xE8 - 0xEF */ | 3212 | /* 0xE8 - 0xEF */ |
2486 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), | 3213 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), |
2487 | D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), | 3214 | D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), |
2488 | D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), | 3215 | D2bvIP(SrcNone | DstAcc, in, check_perm_in), |
3216 | D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), | ||
2489 | /* 0xF0 - 0xF7 */ | 3217 | /* 0xF0 - 0xF7 */ |
2490 | N, N, N, N, | 3218 | N, DI(ImplicitOps, icebp), N, N, |
2491 | D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), | 3219 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), |
3220 | G(ByteOp, group3), G(0, group3), | ||
2492 | /* 0xF8 - 0xFF */ | 3221 | /* 0xF8 - 0xFF */ |
2493 | D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), | 3222 | D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), |
2494 | D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), | 3223 | D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), |
@@ -2496,20 +3225,24 @@ static struct opcode opcode_table[256] = { | |||
2496 | 3225 | ||
2497 | static struct opcode twobyte_table[256] = { | 3226 | static struct opcode twobyte_table[256] = { |
2498 | /* 0x00 - 0x0F */ | 3227 | /* 0x00 - 0x0F */ |
2499 | N, GD(0, &group7), N, N, | 3228 | G(0, group6), GD(0, &group7), N, N, |
2500 | N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, | 3229 | N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, |
2501 | D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, | 3230 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
2502 | N, D(ImplicitOps | ModRM), N, N, | 3231 | N, D(ImplicitOps | ModRM), N, N, |
2503 | /* 0x10 - 0x1F */ | 3232 | /* 0x10 - 0x1F */ |
2504 | N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, | 3233 | N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, |
2505 | /* 0x20 - 0x2F */ | 3234 | /* 0x20 - 0x2F */ |
2506 | D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), | 3235 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), |
2507 | D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), | 3236 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), |
3237 | DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), | ||
3238 | DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), | ||
2508 | N, N, N, N, | 3239 | N, N, N, N, |
2509 | N, N, N, N, N, N, N, N, | 3240 | N, N, N, N, N, N, N, N, |
2510 | /* 0x30 - 0x3F */ | 3241 | /* 0x30 - 0x3F */ |
2511 | D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), | 3242 | DI(ImplicitOps | Priv, wrmsr), |
2512 | D(ImplicitOps | Priv), N, | 3243 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), |
3244 | DI(ImplicitOps | Priv, rdmsr), | ||
3245 | DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), | ||
2513 | D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), | 3246 | D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), |
2514 | N, N, | 3247 | N, N, |
2515 | N, N, N, N, N, N, N, N, | 3248 | N, N, N, N, N, N, N, N, |
@@ -2518,21 +3251,27 @@ static struct opcode twobyte_table[256] = { | |||
2518 | /* 0x50 - 0x5F */ | 3251 | /* 0x50 - 0x5F */ |
2519 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3252 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
2520 | /* 0x60 - 0x6F */ | 3253 | /* 0x60 - 0x6F */ |
2521 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3254 | N, N, N, N, |
3255 | N, N, N, N, | ||
3256 | N, N, N, N, | ||
3257 | N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f), | ||
2522 | /* 0x70 - 0x7F */ | 3258 | /* 0x70 - 0x7F */ |
2523 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3259 | N, N, N, N, |
3260 | N, N, N, N, | ||
3261 | N, N, N, N, | ||
3262 | N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), | ||
2524 | /* 0x80 - 0x8F */ | 3263 | /* 0x80 - 0x8F */ |
2525 | X16(D(SrcImm)), | 3264 | X16(D(SrcImm)), |
2526 | /* 0x90 - 0x9F */ | 3265 | /* 0x90 - 0x9F */ |
2527 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3266 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
2528 | /* 0xA0 - 0xA7 */ | 3267 | /* 0xA0 - 0xA7 */ |
2529 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), | 3268 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), |
2530 | N, D(DstMem | SrcReg | ModRM | BitOp), | 3269 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), |
2531 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3270 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
2532 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3271 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
2533 | /* 0xA8 - 0xAF */ | 3272 | /* 0xA8 - 0xAF */ |
2534 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), | 3273 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), |
2535 | N, D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3274 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), |
2536 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3275 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
2537 | D(DstMem | SrcReg | Src2CL | ModRM), | 3276 | D(DstMem | SrcReg | Src2CL | ModRM), |
2538 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 3277 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), |
@@ -2564,10 +3303,13 @@ static struct opcode twobyte_table[256] = { | |||
2564 | #undef G | 3303 | #undef G |
2565 | #undef GD | 3304 | #undef GD |
2566 | #undef I | 3305 | #undef I |
3306 | #undef GP | ||
3307 | #undef EXT | ||
2567 | 3308 | ||
2568 | #undef D2bv | 3309 | #undef D2bv |
3310 | #undef D2bvIP | ||
2569 | #undef I2bv | 3311 | #undef I2bv |
2570 | #undef D6ALU | 3312 | #undef I6ALU |
2571 | 3313 | ||
2572 | static unsigned imm_size(struct decode_cache *c) | 3314 | static unsigned imm_size(struct decode_cache *c) |
2573 | { | 3315 | { |
@@ -2625,8 +3367,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
2625 | struct decode_cache *c = &ctxt->decode; | 3367 | struct decode_cache *c = &ctxt->decode; |
2626 | int rc = X86EMUL_CONTINUE; | 3368 | int rc = X86EMUL_CONTINUE; |
2627 | int mode = ctxt->mode; | 3369 | int mode = ctxt->mode; |
2628 | int def_op_bytes, def_ad_bytes, dual, goffset; | 3370 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; |
2629 | struct opcode opcode, *g_mod012, *g_mod3; | 3371 | bool op_prefix = false; |
3372 | struct opcode opcode; | ||
2630 | struct operand memop = { .type = OP_NONE }; | 3373 | struct operand memop = { .type = OP_NONE }; |
2631 | 3374 | ||
2632 | c->eip = ctxt->eip; | 3375 | c->eip = ctxt->eip; |
@@ -2634,7 +3377,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
2634 | c->fetch.end = c->fetch.start + insn_len; | 3377 | c->fetch.end = c->fetch.start + insn_len; |
2635 | if (insn_len > 0) | 3378 | if (insn_len > 0) |
2636 | memcpy(c->fetch.data, insn, insn_len); | 3379 | memcpy(c->fetch.data, insn, insn_len); |
2637 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); | ||
2638 | 3380 | ||
2639 | switch (mode) { | 3381 | switch (mode) { |
2640 | case X86EMUL_MODE_REAL: | 3382 | case X86EMUL_MODE_REAL: |
@@ -2662,6 +3404,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
2662 | for (;;) { | 3404 | for (;;) { |
2663 | switch (c->b = insn_fetch(u8, 1, c->eip)) { | 3405 | switch (c->b = insn_fetch(u8, 1, c->eip)) { |
2664 | case 0x66: /* operand-size override */ | 3406 | case 0x66: /* operand-size override */ |
3407 | op_prefix = true; | ||
2665 | /* switch between 2/4 bytes */ | 3408 | /* switch between 2/4 bytes */ |
2666 | c->op_bytes = def_op_bytes ^ 6; | 3409 | c->op_bytes = def_op_bytes ^ 6; |
2667 | break; | 3410 | break; |
@@ -2692,10 +3435,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
2692 | c->lock_prefix = 1; | 3435 | c->lock_prefix = 1; |
2693 | break; | 3436 | break; |
2694 | case 0xf2: /* REPNE/REPNZ */ | 3437 | case 0xf2: /* REPNE/REPNZ */ |
2695 | c->rep_prefix = REPNE_PREFIX; | ||
2696 | break; | ||
2697 | case 0xf3: /* REP/REPE/REPZ */ | 3438 | case 0xf3: /* REP/REPE/REPZ */ |
2698 | c->rep_prefix = REPE_PREFIX; | 3439 | c->rep_prefix = c->b; |
2699 | break; | 3440 | break; |
2700 | default: | 3441 | default: |
2701 | goto done_prefixes; | 3442 | goto done_prefixes; |
@@ -2722,29 +3463,49 @@ done_prefixes: | |||
2722 | } | 3463 | } |
2723 | c->d = opcode.flags; | 3464 | c->d = opcode.flags; |
2724 | 3465 | ||
2725 | if (c->d & Group) { | 3466 | while (c->d & GroupMask) { |
2726 | dual = c->d & GroupDual; | 3467 | switch (c->d & GroupMask) { |
2727 | c->modrm = insn_fetch(u8, 1, c->eip); | 3468 | case Group: |
2728 | --c->eip; | 3469 | c->modrm = insn_fetch(u8, 1, c->eip); |
2729 | 3470 | --c->eip; | |
2730 | if (c->d & GroupDual) { | 3471 | goffset = (c->modrm >> 3) & 7; |
2731 | g_mod012 = opcode.u.gdual->mod012; | 3472 | opcode = opcode.u.group[goffset]; |
2732 | g_mod3 = opcode.u.gdual->mod3; | 3473 | break; |
2733 | } else | 3474 | case GroupDual: |
2734 | g_mod012 = g_mod3 = opcode.u.group; | 3475 | c->modrm = insn_fetch(u8, 1, c->eip); |
2735 | 3476 | --c->eip; | |
2736 | c->d &= ~(Group | GroupDual); | 3477 | goffset = (c->modrm >> 3) & 7; |
2737 | 3478 | if ((c->modrm >> 6) == 3) | |
2738 | goffset = (c->modrm >> 3) & 7; | 3479 | opcode = opcode.u.gdual->mod3[goffset]; |
3480 | else | ||
3481 | opcode = opcode.u.gdual->mod012[goffset]; | ||
3482 | break; | ||
3483 | case RMExt: | ||
3484 | goffset = c->modrm & 7; | ||
3485 | opcode = opcode.u.group[goffset]; | ||
3486 | break; | ||
3487 | case Prefix: | ||
3488 | if (c->rep_prefix && op_prefix) | ||
3489 | return X86EMUL_UNHANDLEABLE; | ||
3490 | simd_prefix = op_prefix ? 0x66 : c->rep_prefix; | ||
3491 | switch (simd_prefix) { | ||
3492 | case 0x00: opcode = opcode.u.gprefix->pfx_no; break; | ||
3493 | case 0x66: opcode = opcode.u.gprefix->pfx_66; break; | ||
3494 | case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; | ||
3495 | case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; | ||
3496 | } | ||
3497 | break; | ||
3498 | default: | ||
3499 | return X86EMUL_UNHANDLEABLE; | ||
3500 | } | ||
2739 | 3501 | ||
2740 | if ((c->modrm >> 6) == 3) | 3502 | c->d &= ~GroupMask; |
2741 | opcode = g_mod3[goffset]; | ||
2742 | else | ||
2743 | opcode = g_mod012[goffset]; | ||
2744 | c->d |= opcode.flags; | 3503 | c->d |= opcode.flags; |
2745 | } | 3504 | } |
2746 | 3505 | ||
2747 | c->execute = opcode.u.execute; | 3506 | c->execute = opcode.u.execute; |
3507 | c->check_perm = opcode.check_perm; | ||
3508 | c->intercept = opcode.intercept; | ||
2748 | 3509 | ||
2749 | /* Unrecognised? */ | 3510 | /* Unrecognised? */ |
2750 | if (c->d == 0 || (c->d & Undefined)) | 3511 | if (c->d == 0 || (c->d & Undefined)) |
@@ -2763,6 +3524,9 @@ done_prefixes: | |||
2763 | c->op_bytes = 4; | 3524 | c->op_bytes = 4; |
2764 | } | 3525 | } |
2765 | 3526 | ||
3527 | if (c->d & Sse) | ||
3528 | c->op_bytes = 16; | ||
3529 | |||
2766 | /* ModRM and SIB bytes. */ | 3530 | /* ModRM and SIB bytes. */ |
2767 | if (c->d & ModRM) { | 3531 | if (c->d & ModRM) { |
2768 | rc = decode_modrm(ctxt, ops, &memop); | 3532 | rc = decode_modrm(ctxt, ops, &memop); |
@@ -2776,7 +3540,7 @@ done_prefixes: | |||
2776 | if (!c->has_seg_override) | 3540 | if (!c->has_seg_override) |
2777 | set_seg_override(c, VCPU_SREG_DS); | 3541 | set_seg_override(c, VCPU_SREG_DS); |
2778 | 3542 | ||
2779 | memop.addr.mem.seg = seg_override(ctxt, ops, c); | 3543 | memop.addr.mem.seg = seg_override(ctxt, c); |
2780 | 3544 | ||
2781 | if (memop.type == OP_MEM && c->ad_bytes != 8) | 3545 | if (memop.type == OP_MEM && c->ad_bytes != 8) |
2782 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; | 3546 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; |
@@ -2792,7 +3556,7 @@ done_prefixes: | |||
2792 | case SrcNone: | 3556 | case SrcNone: |
2793 | break; | 3557 | break; |
2794 | case SrcReg: | 3558 | case SrcReg: |
2795 | decode_register_operand(&c->src, c, 0); | 3559 | decode_register_operand(ctxt, &c->src, c, 0); |
2796 | break; | 3560 | break; |
2797 | case SrcMem16: | 3561 | case SrcMem16: |
2798 | memop.bytes = 2; | 3562 | memop.bytes = 2; |
@@ -2836,7 +3600,7 @@ done_prefixes: | |||
2836 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 3600 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
2837 | c->src.addr.mem.ea = | 3601 | c->src.addr.mem.ea = |
2838 | register_address(c, c->regs[VCPU_REGS_RSI]); | 3602 | register_address(c, c->regs[VCPU_REGS_RSI]); |
2839 | c->src.addr.mem.seg = seg_override(ctxt, ops, c), | 3603 | c->src.addr.mem.seg = seg_override(ctxt, c); |
2840 | c->src.val = 0; | 3604 | c->src.val = 0; |
2841 | break; | 3605 | break; |
2842 | case SrcImmFAddr: | 3606 | case SrcImmFAddr: |
@@ -2883,7 +3647,7 @@ done_prefixes: | |||
2883 | /* Decode and fetch the destination operand: register or memory. */ | 3647 | /* Decode and fetch the destination operand: register or memory. */ |
2884 | switch (c->d & DstMask) { | 3648 | switch (c->d & DstMask) { |
2885 | case DstReg: | 3649 | case DstReg: |
2886 | decode_register_operand(&c->dst, c, | 3650 | decode_register_operand(ctxt, &c->dst, c, |
2887 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 3651 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); |
2888 | break; | 3652 | break; |
2889 | case DstImmUByte: | 3653 | case DstImmUByte: |
@@ -2926,7 +3690,7 @@ done_prefixes: | |||
2926 | } | 3690 | } |
2927 | 3691 | ||
2928 | done: | 3692 | done: |
2929 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 3693 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
2930 | } | 3694 | } |
2931 | 3695 | ||
2932 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3696 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
@@ -2979,12 +3743,51 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
2979 | goto done; | 3743 | goto done; |
2980 | } | 3744 | } |
2981 | 3745 | ||
3746 | if ((c->d & Sse) | ||
3747 | && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) | ||
3748 | || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { | ||
3749 | rc = emulate_ud(ctxt); | ||
3750 | goto done; | ||
3751 | } | ||
3752 | |||
3753 | if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { | ||
3754 | rc = emulate_nm(ctxt); | ||
3755 | goto done; | ||
3756 | } | ||
3757 | |||
3758 | if (unlikely(ctxt->guest_mode) && c->intercept) { | ||
3759 | rc = emulator_check_intercept(ctxt, c->intercept, | ||
3760 | X86_ICPT_PRE_EXCEPT); | ||
3761 | if (rc != X86EMUL_CONTINUE) | ||
3762 | goto done; | ||
3763 | } | ||
3764 | |||
2982 | /* Privileged instruction can be executed only in CPL=0 */ | 3765 | /* Privileged instruction can be executed only in CPL=0 */ |
2983 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { | 3766 | if ((c->d & Priv) && ops->cpl(ctxt)) { |
2984 | rc = emulate_gp(ctxt, 0); | 3767 | rc = emulate_gp(ctxt, 0); |
2985 | goto done; | 3768 | goto done; |
2986 | } | 3769 | } |
2987 | 3770 | ||
3771 | /* Instruction can only be executed in protected mode */ | ||
3772 | if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { | ||
3773 | rc = emulate_ud(ctxt); | ||
3774 | goto done; | ||
3775 | } | ||
3776 | |||
3777 | /* Do instruction specific permission checks */ | ||
3778 | if (c->check_perm) { | ||
3779 | rc = c->check_perm(ctxt); | ||
3780 | if (rc != X86EMUL_CONTINUE) | ||
3781 | goto done; | ||
3782 | } | ||
3783 | |||
3784 | if (unlikely(ctxt->guest_mode) && c->intercept) { | ||
3785 | rc = emulator_check_intercept(ctxt, c->intercept, | ||
3786 | X86_ICPT_POST_EXCEPT); | ||
3787 | if (rc != X86EMUL_CONTINUE) | ||
3788 | goto done; | ||
3789 | } | ||
3790 | |||
2988 | if (c->rep_prefix && (c->d & String)) { | 3791 | if (c->rep_prefix && (c->d & String)) { |
2989 | /* All REP prefixes have the same first termination condition */ | 3792 | /* All REP prefixes have the same first termination condition */ |
2990 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { | 3793 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
@@ -2994,16 +3797,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
2994 | } | 3797 | } |
2995 | 3798 | ||
2996 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { | 3799 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { |
2997 | rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), | 3800 | rc = segmented_read(ctxt, c->src.addr.mem, |
2998 | c->src.valptr, c->src.bytes); | 3801 | c->src.valptr, c->src.bytes); |
2999 | if (rc != X86EMUL_CONTINUE) | 3802 | if (rc != X86EMUL_CONTINUE) |
3000 | goto done; | 3803 | goto done; |
3001 | c->src.orig_val64 = c->src.val64; | 3804 | c->src.orig_val64 = c->src.val64; |
3002 | } | 3805 | } |
3003 | 3806 | ||
3004 | if (c->src2.type == OP_MEM) { | 3807 | if (c->src2.type == OP_MEM) { |
3005 | rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), | 3808 | rc = segmented_read(ctxt, c->src2.addr.mem, |
3006 | &c->src2.val, c->src2.bytes); | 3809 | &c->src2.val, c->src2.bytes); |
3007 | if (rc != X86EMUL_CONTINUE) | 3810 | if (rc != X86EMUL_CONTINUE) |
3008 | goto done; | 3811 | goto done; |
3009 | } | 3812 | } |
@@ -3014,7 +3817,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3014 | 3817 | ||
3015 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { | 3818 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
3016 | /* optimisation - avoid slow emulated read if Mov */ | 3819 | /* optimisation - avoid slow emulated read if Mov */ |
3017 | rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), | 3820 | rc = segmented_read(ctxt, c->dst.addr.mem, |
3018 | &c->dst.val, c->dst.bytes); | 3821 | &c->dst.val, c->dst.bytes); |
3019 | if (rc != X86EMUL_CONTINUE) | 3822 | if (rc != X86EMUL_CONTINUE) |
3020 | goto done; | 3823 | goto done; |
@@ -3023,6 +3826,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3023 | 3826 | ||
3024 | special_insn: | 3827 | special_insn: |
3025 | 3828 | ||
3829 | if (unlikely(ctxt->guest_mode) && c->intercept) { | ||
3830 | rc = emulator_check_intercept(ctxt, c->intercept, | ||
3831 | X86_ICPT_POST_MEMACCESS); | ||
3832 | if (rc != X86EMUL_CONTINUE) | ||
3833 | goto done; | ||
3834 | } | ||
3835 | |||
3026 | if (c->execute) { | 3836 | if (c->execute) { |
3027 | rc = c->execute(ctxt); | 3837 | rc = c->execute(ctxt); |
3028 | if (rc != X86EMUL_CONTINUE) | 3838 | if (rc != X86EMUL_CONTINUE) |
@@ -3034,75 +3844,33 @@ special_insn: | |||
3034 | goto twobyte_insn; | 3844 | goto twobyte_insn; |
3035 | 3845 | ||
3036 | switch (c->b) { | 3846 | switch (c->b) { |
3037 | case 0x00 ... 0x05: | ||
3038 | add: /* add */ | ||
3039 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | ||
3040 | break; | ||
3041 | case 0x06: /* push es */ | 3847 | case 0x06: /* push es */ |
3042 | emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); | 3848 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); |
3043 | break; | 3849 | break; |
3044 | case 0x07: /* pop es */ | 3850 | case 0x07: /* pop es */ |
3045 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 3851 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
3046 | break; | 3852 | break; |
3047 | case 0x08 ... 0x0d: | ||
3048 | or: /* or */ | ||
3049 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | ||
3050 | break; | ||
3051 | case 0x0e: /* push cs */ | 3853 | case 0x0e: /* push cs */ |
3052 | emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); | 3854 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); |
3053 | break; | ||
3054 | case 0x10 ... 0x15: | ||
3055 | adc: /* adc */ | ||
3056 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | ||
3057 | break; | 3855 | break; |
3058 | case 0x16: /* push ss */ | 3856 | case 0x16: /* push ss */ |
3059 | emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); | 3857 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); |
3060 | break; | 3858 | break; |
3061 | case 0x17: /* pop ss */ | 3859 | case 0x17: /* pop ss */ |
3062 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 3860 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
3063 | break; | 3861 | break; |
3064 | case 0x18 ... 0x1d: | ||
3065 | sbb: /* sbb */ | ||
3066 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | ||
3067 | break; | ||
3068 | case 0x1e: /* push ds */ | 3862 | case 0x1e: /* push ds */ |
3069 | emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); | 3863 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); |
3070 | break; | 3864 | break; |
3071 | case 0x1f: /* pop ds */ | 3865 | case 0x1f: /* pop ds */ |
3072 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 3866 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
3073 | break; | 3867 | break; |
3074 | case 0x20 ... 0x25: | ||
3075 | and: /* and */ | ||
3076 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | ||
3077 | break; | ||
3078 | case 0x28 ... 0x2d: | ||
3079 | sub: /* sub */ | ||
3080 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); | ||
3081 | break; | ||
3082 | case 0x30 ... 0x35: | ||
3083 | xor: /* xor */ | ||
3084 | emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); | ||
3085 | break; | ||
3086 | case 0x38 ... 0x3d: | ||
3087 | cmp: /* cmp */ | ||
3088 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
3089 | break; | ||
3090 | case 0x40 ... 0x47: /* inc r16/r32 */ | 3868 | case 0x40 ... 0x47: /* inc r16/r32 */ |
3091 | emulate_1op("inc", c->dst, ctxt->eflags); | 3869 | emulate_1op("inc", c->dst, ctxt->eflags); |
3092 | break; | 3870 | break; |
3093 | case 0x48 ... 0x4f: /* dec r16/r32 */ | 3871 | case 0x48 ... 0x4f: /* dec r16/r32 */ |
3094 | emulate_1op("dec", c->dst, ctxt->eflags); | 3872 | emulate_1op("dec", c->dst, ctxt->eflags); |
3095 | break; | 3873 | break; |
3096 | case 0x58 ... 0x5f: /* pop reg */ | ||
3097 | pop_instruction: | ||
3098 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); | ||
3099 | break; | ||
3100 | case 0x60: /* pusha */ | ||
3101 | rc = emulate_pusha(ctxt, ops); | ||
3102 | break; | ||
3103 | case 0x61: /* popa */ | ||
3104 | rc = emulate_popa(ctxt, ops); | ||
3105 | break; | ||
3106 | case 0x63: /* movsxd */ | 3874 | case 0x63: /* movsxd */ |
3107 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 3875 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
3108 | goto cannot_emulate; | 3876 | goto cannot_emulate; |
@@ -3121,26 +3889,6 @@ special_insn: | |||
3121 | if (test_cc(c->b, ctxt->eflags)) | 3889 | if (test_cc(c->b, ctxt->eflags)) |
3122 | jmp_rel(c, c->src.val); | 3890 | jmp_rel(c, c->src.val); |
3123 | break; | 3891 | break; |
3124 | case 0x80 ... 0x83: /* Grp1 */ | ||
3125 | switch (c->modrm_reg) { | ||
3126 | case 0: | ||
3127 | goto add; | ||
3128 | case 1: | ||
3129 | goto or; | ||
3130 | case 2: | ||
3131 | goto adc; | ||
3132 | case 3: | ||
3133 | goto sbb; | ||
3134 | case 4: | ||
3135 | goto and; | ||
3136 | case 5: | ||
3137 | goto sub; | ||
3138 | case 6: | ||
3139 | goto xor; | ||
3140 | case 7: | ||
3141 | goto cmp; | ||
3142 | } | ||
3143 | break; | ||
3144 | case 0x84 ... 0x85: | 3892 | case 0x84 ... 0x85: |
3145 | test: | 3893 | test: |
3146 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | 3894 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); |
@@ -3162,7 +3910,7 @@ special_insn: | |||
3162 | rc = emulate_ud(ctxt); | 3910 | rc = emulate_ud(ctxt); |
3163 | goto done; | 3911 | goto done; |
3164 | } | 3912 | } |
3165 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); | 3913 | c->dst.val = get_segment_selector(ctxt, c->modrm_reg); |
3166 | break; | 3914 | break; |
3167 | case 0x8d: /* lea r16/r32, m */ | 3915 | case 0x8d: /* lea r16/r32, m */ |
3168 | c->dst.val = c->src.addr.mem.ea; | 3916 | c->dst.val = c->src.addr.mem.ea; |
@@ -3187,7 +3935,7 @@ special_insn: | |||
3187 | break; | 3935 | break; |
3188 | } | 3936 | } |
3189 | case 0x8f: /* pop (sole member of Grp1a) */ | 3937 | case 0x8f: /* pop (sole member of Grp1a) */ |
3190 | rc = emulate_grp1a(ctxt, ops); | 3938 | rc = em_grp1a(ctxt); |
3191 | break; | 3939 | break; |
3192 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ | 3940 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ |
3193 | if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) | 3941 | if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) |
@@ -3200,31 +3948,17 @@ special_insn: | |||
3200 | case 8: c->dst.val = (s32)c->dst.val; break; | 3948 | case 8: c->dst.val = (s32)c->dst.val; break; |
3201 | } | 3949 | } |
3202 | break; | 3950 | break; |
3203 | case 0x9c: /* pushf */ | ||
3204 | c->src.val = (unsigned long) ctxt->eflags; | ||
3205 | emulate_push(ctxt, ops); | ||
3206 | break; | ||
3207 | case 0x9d: /* popf */ | ||
3208 | c->dst.type = OP_REG; | ||
3209 | c->dst.addr.reg = &ctxt->eflags; | ||
3210 | c->dst.bytes = c->op_bytes; | ||
3211 | rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); | ||
3212 | break; | ||
3213 | case 0xa6 ... 0xa7: /* cmps */ | ||
3214 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
3215 | goto cmp; | ||
3216 | case 0xa8 ... 0xa9: /* test ax, imm */ | 3951 | case 0xa8 ... 0xa9: /* test ax, imm */ |
3217 | goto test; | 3952 | goto test; |
3218 | case 0xae ... 0xaf: /* scas */ | ||
3219 | goto cmp; | ||
3220 | case 0xc0 ... 0xc1: | 3953 | case 0xc0 ... 0xc1: |
3221 | emulate_grp2(ctxt); | 3954 | rc = em_grp2(ctxt); |
3222 | break; | 3955 | break; |
3223 | case 0xc3: /* ret */ | 3956 | case 0xc3: /* ret */ |
3224 | c->dst.type = OP_REG; | 3957 | c->dst.type = OP_REG; |
3225 | c->dst.addr.reg = &c->eip; | 3958 | c->dst.addr.reg = &c->eip; |
3226 | c->dst.bytes = c->op_bytes; | 3959 | c->dst.bytes = c->op_bytes; |
3227 | goto pop_instruction; | 3960 | rc = em_pop(ctxt); |
3961 | break; | ||
3228 | case 0xc4: /* les */ | 3962 | case 0xc4: /* les */ |
3229 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); | 3963 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); |
3230 | break; | 3964 | break; |
@@ -3252,11 +3986,11 @@ special_insn: | |||
3252 | rc = emulate_iret(ctxt, ops); | 3986 | rc = emulate_iret(ctxt, ops); |
3253 | break; | 3987 | break; |
3254 | case 0xd0 ... 0xd1: /* Grp2 */ | 3988 | case 0xd0 ... 0xd1: /* Grp2 */ |
3255 | emulate_grp2(ctxt); | 3989 | rc = em_grp2(ctxt); |
3256 | break; | 3990 | break; |
3257 | case 0xd2 ... 0xd3: /* Grp2 */ | 3991 | case 0xd2 ... 0xd3: /* Grp2 */ |
3258 | c->src.val = c->regs[VCPU_REGS_RCX]; | 3992 | c->src.val = c->regs[VCPU_REGS_RCX]; |
3259 | emulate_grp2(ctxt); | 3993 | rc = em_grp2(ctxt); |
3260 | break; | 3994 | break; |
3261 | case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ | 3995 | case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ |
3262 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | 3996 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); |
@@ -3278,23 +4012,14 @@ special_insn: | |||
3278 | long int rel = c->src.val; | 4012 | long int rel = c->src.val; |
3279 | c->src.val = (unsigned long) c->eip; | 4013 | c->src.val = (unsigned long) c->eip; |
3280 | jmp_rel(c, rel); | 4014 | jmp_rel(c, rel); |
3281 | emulate_push(ctxt, ops); | 4015 | rc = em_push(ctxt); |
3282 | break; | 4016 | break; |
3283 | } | 4017 | } |
3284 | case 0xe9: /* jmp rel */ | 4018 | case 0xe9: /* jmp rel */ |
3285 | goto jmp; | 4019 | goto jmp; |
3286 | case 0xea: { /* jmp far */ | 4020 | case 0xea: /* jmp far */ |
3287 | unsigned short sel; | 4021 | rc = em_jmp_far(ctxt); |
3288 | jump_far: | ||
3289 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | ||
3290 | |||
3291 | if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) | ||
3292 | goto done; | ||
3293 | |||
3294 | c->eip = 0; | ||
3295 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | ||
3296 | break; | 4022 | break; |
3297 | } | ||
3298 | case 0xeb: | 4023 | case 0xeb: |
3299 | jmp: /* jmp rel short */ | 4024 | jmp: /* jmp rel short */ |
3300 | jmp_rel(c, c->src.val); | 4025 | jmp_rel(c, c->src.val); |
@@ -3304,11 +4029,6 @@ special_insn: | |||
3304 | case 0xed: /* in (e/r)ax,dx */ | 4029 | case 0xed: /* in (e/r)ax,dx */ |
3305 | c->src.val = c->regs[VCPU_REGS_RDX]; | 4030 | c->src.val = c->regs[VCPU_REGS_RDX]; |
3306 | do_io_in: | 4031 | do_io_in: |
3307 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
3308 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | ||
3309 | rc = emulate_gp(ctxt, 0); | ||
3310 | goto done; | ||
3311 | } | ||
3312 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | 4032 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, |
3313 | &c->dst.val)) | 4033 | &c->dst.val)) |
3314 | goto done; /* IO is needed */ | 4034 | goto done; /* IO is needed */ |
@@ -3317,25 +4037,19 @@ special_insn: | |||
3317 | case 0xef: /* out dx,(e/r)ax */ | 4037 | case 0xef: /* out dx,(e/r)ax */ |
3318 | c->dst.val = c->regs[VCPU_REGS_RDX]; | 4038 | c->dst.val = c->regs[VCPU_REGS_RDX]; |
3319 | do_io_out: | 4039 | do_io_out: |
3320 | c->src.bytes = min(c->src.bytes, 4u); | 4040 | ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, |
3321 | if (!emulator_io_permited(ctxt, ops, c->dst.val, | 4041 | &c->src.val, 1); |
3322 | c->src.bytes)) { | ||
3323 | rc = emulate_gp(ctxt, 0); | ||
3324 | goto done; | ||
3325 | } | ||
3326 | ops->pio_out_emulated(c->src.bytes, c->dst.val, | ||
3327 | &c->src.val, 1, ctxt->vcpu); | ||
3328 | c->dst.type = OP_NONE; /* Disable writeback. */ | 4042 | c->dst.type = OP_NONE; /* Disable writeback. */ |
3329 | break; | 4043 | break; |
3330 | case 0xf4: /* hlt */ | 4044 | case 0xf4: /* hlt */ |
3331 | ctxt->vcpu->arch.halt_request = 1; | 4045 | ctxt->ops->halt(ctxt); |
3332 | break; | 4046 | break; |
3333 | case 0xf5: /* cmc */ | 4047 | case 0xf5: /* cmc */ |
3334 | /* complement carry flag from eflags reg */ | 4048 | /* complement carry flag from eflags reg */ |
3335 | ctxt->eflags ^= EFLG_CF; | 4049 | ctxt->eflags ^= EFLG_CF; |
3336 | break; | 4050 | break; |
3337 | case 0xf6 ... 0xf7: /* Grp3 */ | 4051 | case 0xf6 ... 0xf7: /* Grp3 */ |
3338 | rc = emulate_grp3(ctxt, ops); | 4052 | rc = em_grp3(ctxt); |
3339 | break; | 4053 | break; |
3340 | case 0xf8: /* clc */ | 4054 | case 0xf8: /* clc */ |
3341 | ctxt->eflags &= ~EFLG_CF; | 4055 | ctxt->eflags &= ~EFLG_CF; |
@@ -3366,13 +4080,11 @@ special_insn: | |||
3366 | ctxt->eflags |= EFLG_DF; | 4080 | ctxt->eflags |= EFLG_DF; |
3367 | break; | 4081 | break; |
3368 | case 0xfe: /* Grp4 */ | 4082 | case 0xfe: /* Grp4 */ |
3369 | grp45: | 4083 | rc = em_grp45(ctxt); |
3370 | rc = emulate_grp45(ctxt, ops); | ||
3371 | break; | 4084 | break; |
3372 | case 0xff: /* Grp5 */ | 4085 | case 0xff: /* Grp5 */ |
3373 | if (c->modrm_reg == 5) | 4086 | rc = em_grp45(ctxt); |
3374 | goto jump_far; | 4087 | break; |
3375 | goto grp45; | ||
3376 | default: | 4088 | default: |
3377 | goto cannot_emulate; | 4089 | goto cannot_emulate; |
3378 | } | 4090 | } |
@@ -3381,7 +4093,7 @@ special_insn: | |||
3381 | goto done; | 4093 | goto done; |
3382 | 4094 | ||
3383 | writeback: | 4095 | writeback: |
3384 | rc = writeback(ctxt, ops); | 4096 | rc = writeback(ctxt); |
3385 | if (rc != X86EMUL_CONTINUE) | 4097 | if (rc != X86EMUL_CONTINUE) |
3386 | goto done; | 4098 | goto done; |
3387 | 4099 | ||
@@ -3392,7 +4104,7 @@ writeback: | |||
3392 | c->dst.type = saved_dst_type; | 4104 | c->dst.type = saved_dst_type; |
3393 | 4105 | ||
3394 | if ((c->d & SrcMask) == SrcSI) | 4106 | if ((c->d & SrcMask) == SrcSI) |
3395 | string_addr_inc(ctxt, seg_override(ctxt, ops, c), | 4107 | string_addr_inc(ctxt, seg_override(ctxt, c), |
3396 | VCPU_REGS_RSI, &c->src); | 4108 | VCPU_REGS_RSI, &c->src); |
3397 | 4109 | ||
3398 | if ((c->d & DstMask) == DstDI) | 4110 | if ((c->d & DstMask) == DstDI) |
@@ -3427,115 +4139,34 @@ writeback: | |||
3427 | done: | 4139 | done: |
3428 | if (rc == X86EMUL_PROPAGATE_FAULT) | 4140 | if (rc == X86EMUL_PROPAGATE_FAULT) |
3429 | ctxt->have_exception = true; | 4141 | ctxt->have_exception = true; |
4142 | if (rc == X86EMUL_INTERCEPTED) | ||
4143 | return EMULATION_INTERCEPTED; | ||
4144 | |||
3430 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 4145 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
3431 | 4146 | ||
3432 | twobyte_insn: | 4147 | twobyte_insn: |
3433 | switch (c->b) { | 4148 | switch (c->b) { |
3434 | case 0x01: /* lgdt, lidt, lmsw */ | ||
3435 | switch (c->modrm_reg) { | ||
3436 | u16 size; | ||
3437 | unsigned long address; | ||
3438 | |||
3439 | case 0: /* vmcall */ | ||
3440 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | ||
3441 | goto cannot_emulate; | ||
3442 | |||
3443 | rc = kvm_fix_hypercall(ctxt->vcpu); | ||
3444 | if (rc != X86EMUL_CONTINUE) | ||
3445 | goto done; | ||
3446 | |||
3447 | /* Let the processor re-execute the fixed hypercall */ | ||
3448 | c->eip = ctxt->eip; | ||
3449 | /* Disable writeback. */ | ||
3450 | c->dst.type = OP_NONE; | ||
3451 | break; | ||
3452 | case 2: /* lgdt */ | ||
3453 | rc = read_descriptor(ctxt, ops, c->src.addr.mem, | ||
3454 | &size, &address, c->op_bytes); | ||
3455 | if (rc != X86EMUL_CONTINUE) | ||
3456 | goto done; | ||
3457 | realmode_lgdt(ctxt->vcpu, size, address); | ||
3458 | /* Disable writeback. */ | ||
3459 | c->dst.type = OP_NONE; | ||
3460 | break; | ||
3461 | case 3: /* lidt/vmmcall */ | ||
3462 | if (c->modrm_mod == 3) { | ||
3463 | switch (c->modrm_rm) { | ||
3464 | case 1: | ||
3465 | rc = kvm_fix_hypercall(ctxt->vcpu); | ||
3466 | break; | ||
3467 | default: | ||
3468 | goto cannot_emulate; | ||
3469 | } | ||
3470 | } else { | ||
3471 | rc = read_descriptor(ctxt, ops, c->src.addr.mem, | ||
3472 | &size, &address, | ||
3473 | c->op_bytes); | ||
3474 | if (rc != X86EMUL_CONTINUE) | ||
3475 | goto done; | ||
3476 | realmode_lidt(ctxt->vcpu, size, address); | ||
3477 | } | ||
3478 | /* Disable writeback. */ | ||
3479 | c->dst.type = OP_NONE; | ||
3480 | break; | ||
3481 | case 4: /* smsw */ | ||
3482 | c->dst.bytes = 2; | ||
3483 | c->dst.val = ops->get_cr(0, ctxt->vcpu); | ||
3484 | break; | ||
3485 | case 6: /* lmsw */ | ||
3486 | ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) | | ||
3487 | (c->src.val & 0x0f), ctxt->vcpu); | ||
3488 | c->dst.type = OP_NONE; | ||
3489 | break; | ||
3490 | case 5: /* not defined */ | ||
3491 | emulate_ud(ctxt); | ||
3492 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3493 | goto done; | ||
3494 | case 7: /* invlpg*/ | ||
3495 | emulate_invlpg(ctxt->vcpu, | ||
3496 | linear(ctxt, c->src.addr.mem)); | ||
3497 | /* Disable writeback. */ | ||
3498 | c->dst.type = OP_NONE; | ||
3499 | break; | ||
3500 | default: | ||
3501 | goto cannot_emulate; | ||
3502 | } | ||
3503 | break; | ||
3504 | case 0x05: /* syscall */ | 4149 | case 0x05: /* syscall */ |
3505 | rc = emulate_syscall(ctxt, ops); | 4150 | rc = emulate_syscall(ctxt, ops); |
3506 | break; | 4151 | break; |
3507 | case 0x06: | 4152 | case 0x06: |
3508 | emulate_clts(ctxt->vcpu); | 4153 | rc = em_clts(ctxt); |
3509 | break; | 4154 | break; |
3510 | case 0x09: /* wbinvd */ | 4155 | case 0x09: /* wbinvd */ |
3511 | kvm_emulate_wbinvd(ctxt->vcpu); | 4156 | (ctxt->ops->wbinvd)(ctxt); |
3512 | break; | 4157 | break; |
3513 | case 0x08: /* invd */ | 4158 | case 0x08: /* invd */ |
3514 | case 0x0d: /* GrpP (prefetch) */ | 4159 | case 0x0d: /* GrpP (prefetch) */ |
3515 | case 0x18: /* Grp16 (prefetch/nop) */ | 4160 | case 0x18: /* Grp16 (prefetch/nop) */ |
3516 | break; | 4161 | break; |
3517 | case 0x20: /* mov cr, reg */ | 4162 | case 0x20: /* mov cr, reg */ |
3518 | switch (c->modrm_reg) { | 4163 | c->dst.val = ops->get_cr(ctxt, c->modrm_reg); |
3519 | case 1: | ||
3520 | case 5 ... 7: | ||
3521 | case 9 ... 15: | ||
3522 | emulate_ud(ctxt); | ||
3523 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3524 | goto done; | ||
3525 | } | ||
3526 | c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); | ||
3527 | break; | 4164 | break; |
3528 | case 0x21: /* mov from dr to reg */ | 4165 | case 0x21: /* mov from dr to reg */ |
3529 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 4166 | ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); |
3530 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | ||
3531 | emulate_ud(ctxt); | ||
3532 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3533 | goto done; | ||
3534 | } | ||
3535 | ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); | ||
3536 | break; | 4167 | break; |
3537 | case 0x22: /* mov reg, cr */ | 4168 | case 0x22: /* mov reg, cr */ |
3538 | if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { | 4169 | if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { |
3539 | emulate_gp(ctxt, 0); | 4170 | emulate_gp(ctxt, 0); |
3540 | rc = X86EMUL_PROPAGATE_FAULT; | 4171 | rc = X86EMUL_PROPAGATE_FAULT; |
3541 | goto done; | 4172 | goto done; |
@@ -3543,16 +4174,9 @@ twobyte_insn: | |||
3543 | c->dst.type = OP_NONE; | 4174 | c->dst.type = OP_NONE; |
3544 | break; | 4175 | break; |
3545 | case 0x23: /* mov from reg to dr */ | 4176 | case 0x23: /* mov from reg to dr */ |
3546 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 4177 | if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & |
3547 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | ||
3548 | emulate_ud(ctxt); | ||
3549 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3550 | goto done; | ||
3551 | } | ||
3552 | |||
3553 | if (ops->set_dr(c->modrm_reg, c->src.val & | ||
3554 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | 4178 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? |
3555 | ~0ULL : ~0U), ctxt->vcpu) < 0) { | 4179 | ~0ULL : ~0U)) < 0) { |
3556 | /* #UD condition is already handled by the code above */ | 4180 | /* #UD condition is already handled by the code above */ |
3557 | emulate_gp(ctxt, 0); | 4181 | emulate_gp(ctxt, 0); |
3558 | rc = X86EMUL_PROPAGATE_FAULT; | 4182 | rc = X86EMUL_PROPAGATE_FAULT; |
@@ -3565,7 +4189,7 @@ twobyte_insn: | |||
3565 | /* wrmsr */ | 4189 | /* wrmsr */ |
3566 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 4190 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
3567 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 4191 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
3568 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { | 4192 | if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { |
3569 | emulate_gp(ctxt, 0); | 4193 | emulate_gp(ctxt, 0); |
3570 | rc = X86EMUL_PROPAGATE_FAULT; | 4194 | rc = X86EMUL_PROPAGATE_FAULT; |
3571 | goto done; | 4195 | goto done; |
@@ -3574,7 +4198,7 @@ twobyte_insn: | |||
3574 | break; | 4198 | break; |
3575 | case 0x32: | 4199 | case 0x32: |
3576 | /* rdmsr */ | 4200 | /* rdmsr */ |
3577 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { | 4201 | if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { |
3578 | emulate_gp(ctxt, 0); | 4202 | emulate_gp(ctxt, 0); |
3579 | rc = X86EMUL_PROPAGATE_FAULT; | 4203 | rc = X86EMUL_PROPAGATE_FAULT; |
3580 | goto done; | 4204 | goto done; |
@@ -3603,7 +4227,7 @@ twobyte_insn: | |||
3603 | c->dst.val = test_cc(c->b, ctxt->eflags); | 4227 | c->dst.val = test_cc(c->b, ctxt->eflags); |
3604 | break; | 4228 | break; |
3605 | case 0xa0: /* push fs */ | 4229 | case 0xa0: /* push fs */ |
3606 | emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); | 4230 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); |
3607 | break; | 4231 | break; |
3608 | case 0xa1: /* pop fs */ | 4232 | case 0xa1: /* pop fs */ |
3609 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 4233 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
@@ -3620,7 +4244,7 @@ twobyte_insn: | |||
3620 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 4244 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
3621 | break; | 4245 | break; |
3622 | case 0xa8: /* push gs */ | 4246 | case 0xa8: /* push gs */ |
3623 | emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); | 4247 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); |
3624 | break; | 4248 | break; |
3625 | case 0xa9: /* pop gs */ | 4249 | case 0xa9: /* pop gs */ |
3626 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 4250 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
@@ -3727,7 +4351,7 @@ twobyte_insn: | |||
3727 | (u64) c->src.val; | 4351 | (u64) c->src.val; |
3728 | break; | 4352 | break; |
3729 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 4353 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
3730 | rc = emulate_grp9(ctxt, ops); | 4354 | rc = em_grp9(ctxt); |
3731 | break; | 4355 | break; |
3732 | default: | 4356 | default: |
3733 | goto cannot_emulate; | 4357 | goto cannot_emulate; |
@@ -3739,5 +4363,5 @@ twobyte_insn: | |||
3739 | goto writeback; | 4363 | goto writeback; |
3740 | 4364 | ||
3741 | cannot_emulate: | 4365 | cannot_emulate: |
3742 | return -1; | 4366 | return EMULATION_FAILED; |
3743 | } | 4367 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 46d08ca0b48f..51a97426e791 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -33,7 +33,6 @@ struct kvm_kpit_state { | |||
33 | }; | 33 | }; |
34 | 34 | ||
35 | struct kvm_pit { | 35 | struct kvm_pit { |
36 | unsigned long base_addresss; | ||
37 | struct kvm_io_device dev; | 36 | struct kvm_io_device dev; |
38 | struct kvm_io_device speaker_dev; | 37 | struct kvm_io_device speaker_dev; |
39 | struct kvm *kvm; | 38 | struct kvm *kvm; |
@@ -51,7 +50,6 @@ struct kvm_pit { | |||
51 | #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 | 50 | #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 |
52 | #define KVM_PIT_CHANNEL_MASK 0x3 | 51 | #define KVM_PIT_CHANNEL_MASK 0x3 |
53 | 52 | ||
54 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); | ||
55 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); | 53 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); |
56 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); | 54 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); |
57 | void kvm_free_pit(struct kvm *kvm); | 55 | void kvm_free_pit(struct kvm *kvm); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ba910d149410..53e2d084bffb 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm); | |||
75 | void kvm_destroy_pic(struct kvm *kvm); | 75 | void kvm_destroy_pic(struct kvm *kvm); |
76 | int kvm_pic_read_irq(struct kvm *kvm); | 76 | int kvm_pic_read_irq(struct kvm *kvm); |
77 | void kvm_pic_update_irq(struct kvm_pic *s); | 77 | void kvm_pic_update_irq(struct kvm_pic *s); |
78 | void kvm_pic_clear_isr_ack(struct kvm *kvm); | ||
79 | 78 | ||
80 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | 79 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) |
81 | { | 80 | { |
@@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); | |||
100 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); | 99 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); |
101 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); | 100 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); |
102 | 101 | ||
103 | int pit_has_pending_timer(struct kvm_vcpu *vcpu); | ||
104 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); | 102 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); |
105 | 103 | ||
106 | #endif | 104 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 22fae7593ee7..28418054b880 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | |||
1206 | 1206 | ||
1207 | static void nonpaging_update_pte(struct kvm_vcpu *vcpu, | 1207 | static void nonpaging_update_pte(struct kvm_vcpu *vcpu, |
1208 | struct kvm_mmu_page *sp, u64 *spte, | 1208 | struct kvm_mmu_page *sp, u64 *spte, |
1209 | const void *pte, unsigned long mmu_seq) | 1209 | const void *pte) |
1210 | { | 1210 | { |
1211 | WARN_ON(1); | 1211 | WARN_ON(1); |
1212 | } | 1212 | } |
@@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
3163 | } | 3163 | } |
3164 | 3164 | ||
3165 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 3165 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
3166 | struct kvm_mmu_page *sp, | 3166 | struct kvm_mmu_page *sp, u64 *spte, |
3167 | u64 *spte, | 3167 | const void *new) |
3168 | const void *new, unsigned long mmu_seq) | ||
3169 | { | 3168 | { |
3170 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 3169 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
3171 | ++vcpu->kvm->stat.mmu_pde_zapped; | 3170 | ++vcpu->kvm->stat.mmu_pde_zapped; |
@@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
3173 | } | 3172 | } |
3174 | 3173 | ||
3175 | ++vcpu->kvm->stat.mmu_pte_updated; | 3174 | ++vcpu->kvm->stat.mmu_pte_updated; |
3176 | vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); | 3175 | vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); |
3177 | } | 3176 | } |
3178 | 3177 | ||
3179 | static bool need_remote_flush(u64 old, u64 new) | 3178 | static bool need_remote_flush(u64 old, u64 new) |
@@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3229 | struct kvm_mmu_page *sp; | 3228 | struct kvm_mmu_page *sp; |
3230 | struct hlist_node *node; | 3229 | struct hlist_node *node; |
3231 | LIST_HEAD(invalid_list); | 3230 | LIST_HEAD(invalid_list); |
3232 | unsigned long mmu_seq; | ||
3233 | u64 entry, gentry, *spte; | 3231 | u64 entry, gentry, *spte; |
3234 | unsigned pte_size, page_offset, misaligned, quadrant, offset; | 3232 | unsigned pte_size, page_offset, misaligned, quadrant, offset; |
3235 | int level, npte, invlpg_counter, r, flooded = 0; | 3233 | int level, npte, invlpg_counter, r, flooded = 0; |
@@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3271 | break; | 3269 | break; |
3272 | } | 3270 | } |
3273 | 3271 | ||
3274 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
3275 | smp_rmb(); | ||
3276 | |||
3277 | spin_lock(&vcpu->kvm->mmu_lock); | 3272 | spin_lock(&vcpu->kvm->mmu_lock); |
3278 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | 3273 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) |
3279 | gentry = 0; | 3274 | gentry = 0; |
@@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3345 | if (gentry && | 3340 | if (gentry && |
3346 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3341 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
3347 | & mask.word)) | 3342 | & mask.word)) |
3348 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, | 3343 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
3349 | mmu_seq); | ||
3350 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3344 | if (!remote_flush && need_remote_flush(entry, *spte)) |
3351 | remote_flush = true; | 3345 | remote_flush = true; |
3352 | ++spte; | 3346 | ++spte; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index c6397795d865..6c4dc010c4cb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) | |||
78 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; | 78 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; |
79 | } | 79 | } |
80 | 80 | ||
81 | static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, | 81 | static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
82 | gfn_t table_gfn, unsigned index, | 82 | pt_element_t __user *ptep_user, unsigned index, |
83 | pt_element_t orig_pte, pt_element_t new_pte) | 83 | pt_element_t orig_pte, pt_element_t new_pte) |
84 | { | 84 | { |
85 | int npages; | ||
85 | pt_element_t ret; | 86 | pt_element_t ret; |
86 | pt_element_t *table; | 87 | pt_element_t *table; |
87 | struct page *page; | 88 | struct page *page; |
88 | 89 | ||
89 | page = gfn_to_page(kvm, table_gfn); | 90 | npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); |
91 | /* Check if the user is doing something meaningless. */ | ||
92 | if (unlikely(npages != 1)) | ||
93 | return -EFAULT; | ||
90 | 94 | ||
91 | table = kmap_atomic(page, KM_USER0); | 95 | table = kmap_atomic(page, KM_USER0); |
92 | ret = CMPXCHG(&table[index], orig_pte, new_pte); | 96 | ret = CMPXCHG(&table[index], orig_pte, new_pte); |
@@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
117 | gva_t addr, u32 access) | 121 | gva_t addr, u32 access) |
118 | { | 122 | { |
119 | pt_element_t pte; | 123 | pt_element_t pte; |
124 | pt_element_t __user *ptep_user; | ||
120 | gfn_t table_gfn; | 125 | gfn_t table_gfn; |
121 | unsigned index, pt_access, uninitialized_var(pte_access); | 126 | unsigned index, pt_access, uninitialized_var(pte_access); |
122 | gpa_t pte_gpa; | 127 | gpa_t pte_gpa; |
@@ -152,6 +157,9 @@ walk: | |||
152 | pt_access = ACC_ALL; | 157 | pt_access = ACC_ALL; |
153 | 158 | ||
154 | for (;;) { | 159 | for (;;) { |
160 | gfn_t real_gfn; | ||
161 | unsigned long host_addr; | ||
162 | |||
155 | index = PT_INDEX(addr, walker->level); | 163 | index = PT_INDEX(addr, walker->level); |
156 | 164 | ||
157 | table_gfn = gpte_to_gfn(pte); | 165 | table_gfn = gpte_to_gfn(pte); |
@@ -160,43 +168,64 @@ walk: | |||
160 | walker->table_gfn[walker->level - 1] = table_gfn; | 168 | walker->table_gfn[walker->level - 1] = table_gfn; |
161 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 169 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
162 | 170 | ||
163 | if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, | 171 | real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), |
164 | offset, sizeof(pte), | 172 | PFERR_USER_MASK|PFERR_WRITE_MASK); |
165 | PFERR_USER_MASK|PFERR_WRITE_MASK)) { | 173 | if (unlikely(real_gfn == UNMAPPED_GVA)) { |
174 | present = false; | ||
175 | break; | ||
176 | } | ||
177 | real_gfn = gpa_to_gfn(real_gfn); | ||
178 | |||
179 | host_addr = gfn_to_hva(vcpu->kvm, real_gfn); | ||
180 | if (unlikely(kvm_is_error_hva(host_addr))) { | ||
181 | present = false; | ||
182 | break; | ||
183 | } | ||
184 | |||
185 | ptep_user = (pt_element_t __user *)((void *)host_addr + offset); | ||
186 | if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { | ||
166 | present = false; | 187 | present = false; |
167 | break; | 188 | break; |
168 | } | 189 | } |
169 | 190 | ||
170 | trace_kvm_mmu_paging_element(pte, walker->level); | 191 | trace_kvm_mmu_paging_element(pte, walker->level); |
171 | 192 | ||
172 | if (!is_present_gpte(pte)) { | 193 | if (unlikely(!is_present_gpte(pte))) { |
173 | present = false; | 194 | present = false; |
174 | break; | 195 | break; |
175 | } | 196 | } |
176 | 197 | ||
177 | if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { | 198 | if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, |
199 | walker->level))) { | ||
178 | rsvd_fault = true; | 200 | rsvd_fault = true; |
179 | break; | 201 | break; |
180 | } | 202 | } |
181 | 203 | ||
182 | if (write_fault && !is_writable_pte(pte)) | 204 | if (unlikely(write_fault && !is_writable_pte(pte) |
183 | if (user_fault || is_write_protection(vcpu)) | 205 | && (user_fault || is_write_protection(vcpu)))) |
184 | eperm = true; | 206 | eperm = true; |
185 | 207 | ||
186 | if (user_fault && !(pte & PT_USER_MASK)) | 208 | if (unlikely(user_fault && !(pte & PT_USER_MASK))) |
187 | eperm = true; | 209 | eperm = true; |
188 | 210 | ||
189 | #if PTTYPE == 64 | 211 | #if PTTYPE == 64 |
190 | if (fetch_fault && (pte & PT64_NX_MASK)) | 212 | if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) |
191 | eperm = true; | 213 | eperm = true; |
192 | #endif | 214 | #endif |
193 | 215 | ||
194 | if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { | 216 | if (!eperm && !rsvd_fault |
217 | && unlikely(!(pte & PT_ACCESSED_MASK))) { | ||
218 | int ret; | ||
195 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 219 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
196 | sizeof(pte)); | 220 | sizeof(pte)); |
197 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, | 221 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, |
198 | index, pte, pte|PT_ACCESSED_MASK)) | 222 | pte, pte|PT_ACCESSED_MASK); |
223 | if (unlikely(ret < 0)) { | ||
224 | present = false; | ||
225 | break; | ||
226 | } else if (ret) | ||
199 | goto walk; | 227 | goto walk; |
228 | |||
200 | mark_page_dirty(vcpu->kvm, table_gfn); | 229 | mark_page_dirty(vcpu->kvm, table_gfn); |
201 | pte |= PT_ACCESSED_MASK; | 230 | pte |= PT_ACCESSED_MASK; |
202 | } | 231 | } |
@@ -241,17 +270,21 @@ walk: | |||
241 | --walker->level; | 270 | --walker->level; |
242 | } | 271 | } |
243 | 272 | ||
244 | if (!present || eperm || rsvd_fault) | 273 | if (unlikely(!present || eperm || rsvd_fault)) |
245 | goto error; | 274 | goto error; |
246 | 275 | ||
247 | if (write_fault && !is_dirty_gpte(pte)) { | 276 | if (write_fault && unlikely(!is_dirty_gpte(pte))) { |
248 | bool ret; | 277 | int ret; |
249 | 278 | ||
250 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 279 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
251 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, | 280 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, |
252 | pte|PT_DIRTY_MASK); | 281 | pte, pte|PT_DIRTY_MASK); |
253 | if (ret) | 282 | if (unlikely(ret < 0)) { |
283 | present = false; | ||
284 | goto error; | ||
285 | } else if (ret) | ||
254 | goto walk; | 286 | goto walk; |
287 | |||
255 | mark_page_dirty(vcpu->kvm, table_gfn); | 288 | mark_page_dirty(vcpu->kvm, table_gfn); |
256 | pte |= PT_DIRTY_MASK; | 289 | pte |= PT_DIRTY_MASK; |
257 | walker->ptes[walker->level - 1] = pte; | 290 | walker->ptes[walker->level - 1] = pte; |
@@ -325,7 +358,7 @@ no_present: | |||
325 | } | 358 | } |
326 | 359 | ||
327 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 360 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
328 | u64 *spte, const void *pte, unsigned long mmu_seq) | 361 | u64 *spte, const void *pte) |
329 | { | 362 | { |
330 | pt_element_t gpte; | 363 | pt_element_t gpte; |
331 | unsigned pte_access; | 364 | unsigned pte_access; |
@@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
342 | kvm_release_pfn_clean(pfn); | 375 | kvm_release_pfn_clean(pfn); |
343 | return; | 376 | return; |
344 | } | 377 | } |
345 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
346 | return; | ||
347 | 378 | ||
348 | /* | 379 | /* |
349 | * we call mmu_set_spte() with host_writable = true because that | 380 | * we call mmu_set_spte() with host_writable = true because that |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6bb15d583e47..506e4fe23adc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -63,6 +63,10 @@ MODULE_LICENSE("GPL"); | |||
63 | 63 | ||
64 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 64 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
65 | 65 | ||
66 | #define TSC_RATIO_RSVD 0xffffff0000000000ULL | ||
67 | #define TSC_RATIO_MIN 0x0000000000000001ULL | ||
68 | #define TSC_RATIO_MAX 0x000000ffffffffffULL | ||
69 | |||
66 | static bool erratum_383_found __read_mostly; | 70 | static bool erratum_383_found __read_mostly; |
67 | 71 | ||
68 | static const u32 host_save_user_msrs[] = { | 72 | static const u32 host_save_user_msrs[] = { |
@@ -93,14 +97,6 @@ struct nested_state { | |||
93 | /* A VMEXIT is required but not yet emulated */ | 97 | /* A VMEXIT is required but not yet emulated */ |
94 | bool exit_required; | 98 | bool exit_required; |
95 | 99 | ||
96 | /* | ||
97 | * If we vmexit during an instruction emulation we need this to restore | ||
98 | * the l1 guest rip after the emulation | ||
99 | */ | ||
100 | unsigned long vmexit_rip; | ||
101 | unsigned long vmexit_rsp; | ||
102 | unsigned long vmexit_rax; | ||
103 | |||
104 | /* cache for intercepts of the guest */ | 100 | /* cache for intercepts of the guest */ |
105 | u32 intercept_cr; | 101 | u32 intercept_cr; |
106 | u32 intercept_dr; | 102 | u32 intercept_dr; |
@@ -144,8 +140,13 @@ struct vcpu_svm { | |||
144 | unsigned int3_injected; | 140 | unsigned int3_injected; |
145 | unsigned long int3_rip; | 141 | unsigned long int3_rip; |
146 | u32 apf_reason; | 142 | u32 apf_reason; |
143 | |||
144 | u64 tsc_ratio; | ||
147 | }; | 145 | }; |
148 | 146 | ||
147 | static DEFINE_PER_CPU(u64, current_tsc_ratio); | ||
148 | #define TSC_RATIO_DEFAULT 0x0100000000ULL | ||
149 | |||
149 | #define MSR_INVALID 0xffffffffU | 150 | #define MSR_INVALID 0xffffffffU |
150 | 151 | ||
151 | static struct svm_direct_access_msrs { | 152 | static struct svm_direct_access_msrs { |
@@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm); | |||
190 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 191 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
191 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 192 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
192 | bool has_error_code, u32 error_code); | 193 | bool has_error_code, u32 error_code); |
194 | static u64 __scale_tsc(u64 ratio, u64 tsc); | ||
193 | 195 | ||
194 | enum { | 196 | enum { |
195 | VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, | 197 | VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, |
@@ -376,7 +378,6 @@ struct svm_cpu_data { | |||
376 | }; | 378 | }; |
377 | 379 | ||
378 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); | 380 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); |
379 | static uint32_t svm_features; | ||
380 | 381 | ||
381 | struct svm_init_data { | 382 | struct svm_init_data { |
382 | int cpu; | 383 | int cpu; |
@@ -569,6 +570,10 @@ static int has_svm(void) | |||
569 | 570 | ||
570 | static void svm_hardware_disable(void *garbage) | 571 | static void svm_hardware_disable(void *garbage) |
571 | { | 572 | { |
573 | /* Make sure we clean up behind us */ | ||
574 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) | ||
575 | wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); | ||
576 | |||
572 | cpu_svm_disable(); | 577 | cpu_svm_disable(); |
573 | } | 578 | } |
574 | 579 | ||
@@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage) | |||
610 | 615 | ||
611 | wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); | 616 | wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); |
612 | 617 | ||
618 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { | ||
619 | wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); | ||
620 | __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; | ||
621 | } | ||
622 | |||
613 | svm_init_erratum_383(); | 623 | svm_init_erratum_383(); |
614 | 624 | ||
615 | return 0; | 625 | return 0; |
@@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void) | |||
791 | if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) | 801 | if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) |
792 | kvm_enable_efer_bits(EFER_FFXSR); | 802 | kvm_enable_efer_bits(EFER_FFXSR); |
793 | 803 | ||
804 | if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { | ||
805 | u64 max; | ||
806 | |||
807 | kvm_has_tsc_control = true; | ||
808 | |||
809 | /* | ||
810 | * Make sure the user can only configure tsc_khz values that | ||
811 | * fit into a signed integer. | ||
812 | * A min value is not calculated needed because it will always | ||
813 | * be 1 on all machines and a value of 0 is used to disable | ||
814 | * tsc-scaling for the vcpu. | ||
815 | */ | ||
816 | max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); | ||
817 | |||
818 | kvm_max_guest_tsc_khz = max; | ||
819 | } | ||
820 | |||
794 | if (nested) { | 821 | if (nested) { |
795 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); | 822 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); |
796 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); | 823 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); |
@@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void) | |||
802 | goto err; | 829 | goto err; |
803 | } | 830 | } |
804 | 831 | ||
805 | svm_features = cpuid_edx(SVM_CPUID_FUNC); | ||
806 | |||
807 | if (!boot_cpu_has(X86_FEATURE_NPT)) | 832 | if (!boot_cpu_has(X86_FEATURE_NPT)) |
808 | npt_enabled = false; | 833 | npt_enabled = false; |
809 | 834 | ||
@@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) | |||
854 | seg->base = 0; | 879 | seg->base = 0; |
855 | } | 880 | } |
856 | 881 | ||
882 | static u64 __scale_tsc(u64 ratio, u64 tsc) | ||
883 | { | ||
884 | u64 mult, frac, _tsc; | ||
885 | |||
886 | mult = ratio >> 32; | ||
887 | frac = ratio & ((1ULL << 32) - 1); | ||
888 | |||
889 | _tsc = tsc; | ||
890 | _tsc *= mult; | ||
891 | _tsc += (tsc >> 32) * frac; | ||
892 | _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; | ||
893 | |||
894 | return _tsc; | ||
895 | } | ||
896 | |||
897 | static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) | ||
898 | { | ||
899 | struct vcpu_svm *svm = to_svm(vcpu); | ||
900 | u64 _tsc = tsc; | ||
901 | |||
902 | if (svm->tsc_ratio != TSC_RATIO_DEFAULT) | ||
903 | _tsc = __scale_tsc(svm->tsc_ratio, tsc); | ||
904 | |||
905 | return _tsc; | ||
906 | } | ||
907 | |||
908 | static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | ||
909 | { | ||
910 | struct vcpu_svm *svm = to_svm(vcpu); | ||
911 | u64 ratio; | ||
912 | u64 khz; | ||
913 | |||
914 | /* TSC scaling supported? */ | ||
915 | if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) | ||
916 | return; | ||
917 | |||
918 | /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ | ||
919 | if (user_tsc_khz == 0) { | ||
920 | vcpu->arch.virtual_tsc_khz = 0; | ||
921 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | ||
922 | return; | ||
923 | } | ||
924 | |||
925 | khz = user_tsc_khz; | ||
926 | |||
927 | /* TSC scaling required - calculate ratio */ | ||
928 | ratio = khz << 32; | ||
929 | do_div(ratio, tsc_khz); | ||
930 | |||
931 | if (ratio == 0 || ratio & TSC_RATIO_RSVD) { | ||
932 | WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", | ||
933 | user_tsc_khz); | ||
934 | return; | ||
935 | } | ||
936 | vcpu->arch.virtual_tsc_khz = user_tsc_khz; | ||
937 | svm->tsc_ratio = ratio; | ||
938 | } | ||
939 | |||
857 | static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | 940 | static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
858 | { | 941 | { |
859 | struct vcpu_svm *svm = to_svm(vcpu); | 942 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | |||
880 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | 963 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
881 | } | 964 | } |
882 | 965 | ||
966 | static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | ||
967 | { | ||
968 | u64 tsc; | ||
969 | |||
970 | tsc = svm_scale_tsc(vcpu, native_read_tsc()); | ||
971 | |||
972 | return target_tsc - tsc; | ||
973 | } | ||
974 | |||
883 | static void init_vmcb(struct vcpu_svm *svm) | 975 | static void init_vmcb(struct vcpu_svm *svm) |
884 | { | 976 | { |
885 | struct vmcb_control_area *control = &svm->vmcb->control; | 977 | struct vmcb_control_area *control = &svm->vmcb->control; |
@@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
975 | svm_set_efer(&svm->vcpu, 0); | 1067 | svm_set_efer(&svm->vcpu, 0); |
976 | save->dr6 = 0xffff0ff0; | 1068 | save->dr6 = 0xffff0ff0; |
977 | save->dr7 = 0x400; | 1069 | save->dr7 = 0x400; |
978 | save->rflags = 2; | 1070 | kvm_set_rflags(&svm->vcpu, 2); |
979 | save->rip = 0x0000fff0; | 1071 | save->rip = 0x0000fff0; |
980 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 1072 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
981 | 1073 | ||
@@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1048 | goto out; | 1140 | goto out; |
1049 | } | 1141 | } |
1050 | 1142 | ||
1143 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | ||
1144 | |||
1051 | err = kvm_vcpu_init(&svm->vcpu, kvm, id); | 1145 | err = kvm_vcpu_init(&svm->vcpu, kvm, id); |
1052 | if (err) | 1146 | if (err) |
1053 | goto free_svm; | 1147 | goto free_svm; |
@@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1141 | 1235 | ||
1142 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1236 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
1143 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1237 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1238 | |||
1239 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && | ||
1240 | svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { | ||
1241 | __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; | ||
1242 | wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); | ||
1243 | } | ||
1144 | } | 1244 | } |
1145 | 1245 | ||
1146 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) | 1246 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) |
@@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1365 | { | 1465 | { |
1366 | struct vcpu_svm *svm = to_svm(vcpu); | 1466 | struct vcpu_svm *svm = to_svm(vcpu); |
1367 | 1467 | ||
1368 | if (is_guest_mode(vcpu)) { | ||
1369 | /* | ||
1370 | * We are here because we run in nested mode, the host kvm | ||
1371 | * intercepts cr0 writes but the l1 hypervisor does not. | ||
1372 | * But the L1 hypervisor may intercept selective cr0 writes. | ||
1373 | * This needs to be checked here. | ||
1374 | */ | ||
1375 | unsigned long old, new; | ||
1376 | |||
1377 | /* Remove bits that would trigger a real cr0 write intercept */ | ||
1378 | old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1379 | new = cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1380 | |||
1381 | if (old == new) { | ||
1382 | /* cr0 write with ts and mp unchanged */ | ||
1383 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
1384 | if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { | ||
1385 | svm->nested.vmexit_rip = kvm_rip_read(vcpu); | ||
1386 | svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
1387 | svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
1388 | return; | ||
1389 | } | ||
1390 | } | ||
1391 | } | ||
1392 | |||
1393 | #ifdef CONFIG_X86_64 | 1468 | #ifdef CONFIG_X86_64 |
1394 | if (vcpu->arch.efer & EFER_LME) { | 1469 | if (vcpu->arch.efer & EFER_LME) { |
1395 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1470 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
@@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2127 | nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); | 2202 | nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); |
2128 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 2203 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
2129 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | 2204 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; |
2130 | nested_vmcb->save.rflags = vmcb->save.rflags; | 2205 | nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); |
2131 | nested_vmcb->save.rip = vmcb->save.rip; | 2206 | nested_vmcb->save.rip = vmcb->save.rip; |
2132 | nested_vmcb->save.rsp = vmcb->save.rsp; | 2207 | nested_vmcb->save.rsp = vmcb->save.rsp; |
2133 | nested_vmcb->save.rax = vmcb->save.rax; | 2208 | nested_vmcb->save.rax = vmcb->save.rax; |
@@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2184 | svm->vmcb->save.ds = hsave->save.ds; | 2259 | svm->vmcb->save.ds = hsave->save.ds; |
2185 | svm->vmcb->save.gdtr = hsave->save.gdtr; | 2260 | svm->vmcb->save.gdtr = hsave->save.gdtr; |
2186 | svm->vmcb->save.idtr = hsave->save.idtr; | 2261 | svm->vmcb->save.idtr = hsave->save.idtr; |
2187 | svm->vmcb->save.rflags = hsave->save.rflags; | 2262 | kvm_set_rflags(&svm->vcpu, hsave->save.rflags); |
2188 | svm_set_efer(&svm->vcpu, hsave->save.efer); | 2263 | svm_set_efer(&svm->vcpu, hsave->save.efer); |
2189 | svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); | 2264 | svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); |
2190 | svm_set_cr4(&svm->vcpu, hsave->save.cr4); | 2265 | svm_set_cr4(&svm->vcpu, hsave->save.cr4); |
@@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2312 | hsave->save.efer = svm->vcpu.arch.efer; | 2387 | hsave->save.efer = svm->vcpu.arch.efer; |
2313 | hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); | 2388 | hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); |
2314 | hsave->save.cr4 = svm->vcpu.arch.cr4; | 2389 | hsave->save.cr4 = svm->vcpu.arch.cr4; |
2315 | hsave->save.rflags = vmcb->save.rflags; | 2390 | hsave->save.rflags = kvm_get_rflags(&svm->vcpu); |
2316 | hsave->save.rip = kvm_rip_read(&svm->vcpu); | 2391 | hsave->save.rip = kvm_rip_read(&svm->vcpu); |
2317 | hsave->save.rsp = vmcb->save.rsp; | 2392 | hsave->save.rsp = vmcb->save.rsp; |
2318 | hsave->save.rax = vmcb->save.rax; | 2393 | hsave->save.rax = vmcb->save.rax; |
@@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2323 | 2398 | ||
2324 | copy_vmcb_control_area(hsave, vmcb); | 2399 | copy_vmcb_control_area(hsave, vmcb); |
2325 | 2400 | ||
2326 | if (svm->vmcb->save.rflags & X86_EFLAGS_IF) | 2401 | if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) |
2327 | svm->vcpu.arch.hflags |= HF_HIF_MASK; | 2402 | svm->vcpu.arch.hflags |= HF_HIF_MASK; |
2328 | else | 2403 | else |
2329 | svm->vcpu.arch.hflags &= ~HF_HIF_MASK; | 2404 | svm->vcpu.arch.hflags &= ~HF_HIF_MASK; |
@@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2341 | svm->vmcb->save.ds = nested_vmcb->save.ds; | 2416 | svm->vmcb->save.ds = nested_vmcb->save.ds; |
2342 | svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; | 2417 | svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; |
2343 | svm->vmcb->save.idtr = nested_vmcb->save.idtr; | 2418 | svm->vmcb->save.idtr = nested_vmcb->save.idtr; |
2344 | svm->vmcb->save.rflags = nested_vmcb->save.rflags; | 2419 | kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); |
2345 | svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); | 2420 | svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); |
2346 | svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); | 2421 | svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); |
2347 | svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); | 2422 | svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); |
@@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
2443 | if (nested_svm_check_permissions(svm)) | 2518 | if (nested_svm_check_permissions(svm)) |
2444 | return 1; | 2519 | return 1; |
2445 | 2520 | ||
2446 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2447 | skip_emulated_instruction(&svm->vcpu); | ||
2448 | |||
2449 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | 2521 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
2450 | if (!nested_vmcb) | 2522 | if (!nested_vmcb) |
2451 | return 1; | 2523 | return 1; |
2452 | 2524 | ||
2525 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2526 | skip_emulated_instruction(&svm->vcpu); | ||
2527 | |||
2453 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 2528 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
2454 | nested_svm_unmap(page); | 2529 | nested_svm_unmap(page); |
2455 | 2530 | ||
@@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm) | |||
2464 | if (nested_svm_check_permissions(svm)) | 2539 | if (nested_svm_check_permissions(svm)) |
2465 | return 1; | 2540 | return 1; |
2466 | 2541 | ||
2467 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2468 | skip_emulated_instruction(&svm->vcpu); | ||
2469 | |||
2470 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | 2542 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
2471 | if (!nested_vmcb) | 2543 | if (!nested_vmcb) |
2472 | return 1; | 2544 | return 1; |
2473 | 2545 | ||
2546 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2547 | skip_emulated_instruction(&svm->vcpu); | ||
2548 | |||
2474 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 2549 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
2475 | nested_svm_unmap(page); | 2550 | nested_svm_unmap(page); |
2476 | 2551 | ||
@@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm) | |||
2676 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; | 2751 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; |
2677 | } | 2752 | } |
2678 | 2753 | ||
2754 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) | ||
2755 | { | ||
2756 | unsigned long cr0 = svm->vcpu.arch.cr0; | ||
2757 | bool ret = false; | ||
2758 | u64 intercept; | ||
2759 | |||
2760 | intercept = svm->nested.intercept; | ||
2761 | |||
2762 | if (!is_guest_mode(&svm->vcpu) || | ||
2763 | (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) | ||
2764 | return false; | ||
2765 | |||
2766 | cr0 &= ~SVM_CR0_SELECTIVE_MASK; | ||
2767 | val &= ~SVM_CR0_SELECTIVE_MASK; | ||
2768 | |||
2769 | if (cr0 ^ val) { | ||
2770 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
2771 | ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); | ||
2772 | } | ||
2773 | |||
2774 | return ret; | ||
2775 | } | ||
2776 | |||
2679 | #define CR_VALID (1ULL << 63) | 2777 | #define CR_VALID (1ULL << 63) |
2680 | 2778 | ||
2681 | static int cr_interception(struct vcpu_svm *svm) | 2779 | static int cr_interception(struct vcpu_svm *svm) |
@@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm) | |||
2699 | val = kvm_register_read(&svm->vcpu, reg); | 2797 | val = kvm_register_read(&svm->vcpu, reg); |
2700 | switch (cr) { | 2798 | switch (cr) { |
2701 | case 0: | 2799 | case 0: |
2702 | err = kvm_set_cr0(&svm->vcpu, val); | 2800 | if (!check_selective_cr0_intercepted(svm, val)) |
2801 | err = kvm_set_cr0(&svm->vcpu, val); | ||
2802 | else | ||
2803 | return 1; | ||
2804 | |||
2703 | break; | 2805 | break; |
2704 | case 3: | 2806 | case 3: |
2705 | err = kvm_set_cr3(&svm->vcpu, val); | 2807 | err = kvm_set_cr3(&svm->vcpu, val); |
@@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm) | |||
2744 | return 1; | 2846 | return 1; |
2745 | } | 2847 | } |
2746 | 2848 | ||
2747 | static int cr0_write_interception(struct vcpu_svm *svm) | ||
2748 | { | ||
2749 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
2750 | int r; | ||
2751 | |||
2752 | r = cr_interception(svm); | ||
2753 | |||
2754 | if (svm->nested.vmexit_rip) { | ||
2755 | kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); | ||
2756 | kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); | ||
2757 | kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); | ||
2758 | svm->nested.vmexit_rip = 0; | ||
2759 | } | ||
2760 | |||
2761 | return r; | ||
2762 | } | ||
2763 | |||
2764 | static int dr_interception(struct vcpu_svm *svm) | 2849 | static int dr_interception(struct vcpu_svm *svm) |
2765 | { | 2850 | { |
2766 | int reg, dr; | 2851 | int reg, dr; |
@@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2813 | case MSR_IA32_TSC: { | 2898 | case MSR_IA32_TSC: { |
2814 | struct vmcb *vmcb = get_host_vmcb(svm); | 2899 | struct vmcb *vmcb = get_host_vmcb(svm); |
2815 | 2900 | ||
2816 | *data = vmcb->control.tsc_offset + native_read_tsc(); | 2901 | *data = vmcb->control.tsc_offset + |
2902 | svm_scale_tsc(vcpu, native_read_tsc()); | ||
2903 | |||
2817 | break; | 2904 | break; |
2818 | } | 2905 | } |
2819 | case MSR_STAR: | 2906 | case MSR_STAR: |
@@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3048 | [SVM_EXIT_READ_CR4] = cr_interception, | 3135 | [SVM_EXIT_READ_CR4] = cr_interception, |
3049 | [SVM_EXIT_READ_CR8] = cr_interception, | 3136 | [SVM_EXIT_READ_CR8] = cr_interception, |
3050 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 3137 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
3051 | [SVM_EXIT_WRITE_CR0] = cr0_write_interception, | 3138 | [SVM_EXIT_WRITE_CR0] = cr_interception, |
3052 | [SVM_EXIT_WRITE_CR3] = cr_interception, | 3139 | [SVM_EXIT_WRITE_CR3] = cr_interception, |
3053 | [SVM_EXIT_WRITE_CR4] = cr_interception, | 3140 | [SVM_EXIT_WRITE_CR4] = cr_interception, |
3054 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 3141 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
@@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3104 | [SVM_EXIT_NPF] = pf_interception, | 3191 | [SVM_EXIT_NPF] = pf_interception, |
3105 | }; | 3192 | }; |
3106 | 3193 | ||
3107 | void dump_vmcb(struct kvm_vcpu *vcpu) | 3194 | static void dump_vmcb(struct kvm_vcpu *vcpu) |
3108 | { | 3195 | { |
3109 | struct vcpu_svm *svm = to_svm(vcpu); | 3196 | struct vcpu_svm *svm = to_svm(vcpu); |
3110 | struct vmcb_control_area *control = &svm->vmcb->control; | 3197 | struct vmcb_control_area *control = &svm->vmcb->control; |
3111 | struct vmcb_save_area *save = &svm->vmcb->save; | 3198 | struct vmcb_save_area *save = &svm->vmcb->save; |
3112 | 3199 | ||
3113 | pr_err("VMCB Control Area:\n"); | 3200 | pr_err("VMCB Control Area:\n"); |
3114 | pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); | 3201 | pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); |
3115 | pr_err("cr_write: %04x\n", control->intercept_cr >> 16); | 3202 | pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); |
3116 | pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); | 3203 | pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); |
3117 | pr_err("dr_write: %04x\n", control->intercept_dr >> 16); | 3204 | pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); |
3118 | pr_err("exceptions: %08x\n", control->intercept_exceptions); | 3205 | pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); |
3119 | pr_err("intercepts: %016llx\n", control->intercept); | 3206 | pr_err("%-20s%016llx\n", "intercepts:", control->intercept); |
3120 | pr_err("pause filter count: %d\n", control->pause_filter_count); | 3207 | pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); |
3121 | pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); | 3208 | pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); |
3122 | pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); | 3209 | pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); |
3123 | pr_err("tsc_offset: %016llx\n", control->tsc_offset); | 3210 | pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); |
3124 | pr_err("asid: %d\n", control->asid); | 3211 | pr_err("%-20s%d\n", "asid:", control->asid); |
3125 | pr_err("tlb_ctl: %d\n", control->tlb_ctl); | 3212 | pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); |
3126 | pr_err("int_ctl: %08x\n", control->int_ctl); | 3213 | pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); |
3127 | pr_err("int_vector: %08x\n", control->int_vector); | 3214 | pr_err("%-20s%08x\n", "int_vector:", control->int_vector); |
3128 | pr_err("int_state: %08x\n", control->int_state); | 3215 | pr_err("%-20s%08x\n", "int_state:", control->int_state); |
3129 | pr_err("exit_code: %08x\n", control->exit_code); | 3216 | pr_err("%-20s%08x\n", "exit_code:", control->exit_code); |
3130 | pr_err("exit_info1: %016llx\n", control->exit_info_1); | 3217 | pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); |
3131 | pr_err("exit_info2: %016llx\n", control->exit_info_2); | 3218 | pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); |
3132 | pr_err("exit_int_info: %08x\n", control->exit_int_info); | 3219 | pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); |
3133 | pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); | 3220 | pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); |
3134 | pr_err("nested_ctl: %lld\n", control->nested_ctl); | 3221 | pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); |
3135 | pr_err("nested_cr3: %016llx\n", control->nested_cr3); | 3222 | pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); |
3136 | pr_err("event_inj: %08x\n", control->event_inj); | 3223 | pr_err("%-20s%08x\n", "event_inj:", control->event_inj); |
3137 | pr_err("event_inj_err: %08x\n", control->event_inj_err); | 3224 | pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); |
3138 | pr_err("lbr_ctl: %lld\n", control->lbr_ctl); | 3225 | pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); |
3139 | pr_err("next_rip: %016llx\n", control->next_rip); | 3226 | pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); |
3140 | pr_err("VMCB State Save Area:\n"); | 3227 | pr_err("VMCB State Save Area:\n"); |
3141 | pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", | 3228 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3142 | save->es.selector, save->es.attrib, | 3229 | "es:", |
3143 | save->es.limit, save->es.base); | 3230 | save->es.selector, save->es.attrib, |
3144 | pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", | 3231 | save->es.limit, save->es.base); |
3145 | save->cs.selector, save->cs.attrib, | 3232 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3146 | save->cs.limit, save->cs.base); | 3233 | "cs:", |
3147 | pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", | 3234 | save->cs.selector, save->cs.attrib, |
3148 | save->ss.selector, save->ss.attrib, | 3235 | save->cs.limit, save->cs.base); |
3149 | save->ss.limit, save->ss.base); | 3236 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3150 | pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", | 3237 | "ss:", |
3151 | save->ds.selector, save->ds.attrib, | 3238 | save->ss.selector, save->ss.attrib, |
3152 | save->ds.limit, save->ds.base); | 3239 | save->ss.limit, save->ss.base); |
3153 | pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", | 3240 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3154 | save->fs.selector, save->fs.attrib, | 3241 | "ds:", |
3155 | save->fs.limit, save->fs.base); | 3242 | save->ds.selector, save->ds.attrib, |
3156 | pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", | 3243 | save->ds.limit, save->ds.base); |
3157 | save->gs.selector, save->gs.attrib, | 3244 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3158 | save->gs.limit, save->gs.base); | 3245 | "fs:", |
3159 | pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", | 3246 | save->fs.selector, save->fs.attrib, |
3160 | save->gdtr.selector, save->gdtr.attrib, | 3247 | save->fs.limit, save->fs.base); |
3161 | save->gdtr.limit, save->gdtr.base); | 3248 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3162 | pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", | 3249 | "gs:", |
3163 | save->ldtr.selector, save->ldtr.attrib, | 3250 | save->gs.selector, save->gs.attrib, |
3164 | save->ldtr.limit, save->ldtr.base); | 3251 | save->gs.limit, save->gs.base); |
3165 | pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", | 3252 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3166 | save->idtr.selector, save->idtr.attrib, | 3253 | "gdtr:", |
3167 | save->idtr.limit, save->idtr.base); | 3254 | save->gdtr.selector, save->gdtr.attrib, |
3168 | pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", | 3255 | save->gdtr.limit, save->gdtr.base); |
3169 | save->tr.selector, save->tr.attrib, | 3256 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3170 | save->tr.limit, save->tr.base); | 3257 | "ldtr:", |
3258 | save->ldtr.selector, save->ldtr.attrib, | ||
3259 | save->ldtr.limit, save->ldtr.base); | ||
3260 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", | ||
3261 | "idtr:", | ||
3262 | save->idtr.selector, save->idtr.attrib, | ||
3263 | save->idtr.limit, save->idtr.base); | ||
3264 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", | ||
3265 | "tr:", | ||
3266 | save->tr.selector, save->tr.attrib, | ||
3267 | save->tr.limit, save->tr.base); | ||
3171 | pr_err("cpl: %d efer: %016llx\n", | 3268 | pr_err("cpl: %d efer: %016llx\n", |
3172 | save->cpl, save->efer); | 3269 | save->cpl, save->efer); |
3173 | pr_err("cr0: %016llx cr2: %016llx\n", | 3270 | pr_err("%-15s %016llx %-13s %016llx\n", |
3174 | save->cr0, save->cr2); | 3271 | "cr0:", save->cr0, "cr2:", save->cr2); |
3175 | pr_err("cr3: %016llx cr4: %016llx\n", | 3272 | pr_err("%-15s %016llx %-13s %016llx\n", |
3176 | save->cr3, save->cr4); | 3273 | "cr3:", save->cr3, "cr4:", save->cr4); |
3177 | pr_err("dr6: %016llx dr7: %016llx\n", | 3274 | pr_err("%-15s %016llx %-13s %016llx\n", |
3178 | save->dr6, save->dr7); | 3275 | "dr6:", save->dr6, "dr7:", save->dr7); |
3179 | pr_err("rip: %016llx rflags: %016llx\n", | 3276 | pr_err("%-15s %016llx %-13s %016llx\n", |
3180 | save->rip, save->rflags); | 3277 | "rip:", save->rip, "rflags:", save->rflags); |
3181 | pr_err("rsp: %016llx rax: %016llx\n", | 3278 | pr_err("%-15s %016llx %-13s %016llx\n", |
3182 | save->rsp, save->rax); | 3279 | "rsp:", save->rsp, "rax:", save->rax); |
3183 | pr_err("star: %016llx lstar: %016llx\n", | 3280 | pr_err("%-15s %016llx %-13s %016llx\n", |
3184 | save->star, save->lstar); | 3281 | "star:", save->star, "lstar:", save->lstar); |
3185 | pr_err("cstar: %016llx sfmask: %016llx\n", | 3282 | pr_err("%-15s %016llx %-13s %016llx\n", |
3186 | save->cstar, save->sfmask); | 3283 | "cstar:", save->cstar, "sfmask:", save->sfmask); |
3187 | pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", | 3284 | pr_err("%-15s %016llx %-13s %016llx\n", |
3188 | save->kernel_gs_base, save->sysenter_cs); | 3285 | "kernel_gs_base:", save->kernel_gs_base, |
3189 | pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", | 3286 | "sysenter_cs:", save->sysenter_cs); |
3190 | save->sysenter_esp, save->sysenter_eip); | 3287 | pr_err("%-15s %016llx %-13s %016llx\n", |
3191 | pr_err("gpat: %016llx dbgctl: %016llx\n", | 3288 | "sysenter_esp:", save->sysenter_esp, |
3192 | save->g_pat, save->dbgctl); | 3289 | "sysenter_eip:", save->sysenter_eip); |
3193 | pr_err("br_from: %016llx br_to: %016llx\n", | 3290 | pr_err("%-15s %016llx %-13s %016llx\n", |
3194 | save->br_from, save->br_to); | 3291 | "gpat:", save->g_pat, "dbgctl:", save->dbgctl); |
3195 | pr_err("excp_from: %016llx excp_to: %016llx\n", | 3292 | pr_err("%-15s %016llx %-13s %016llx\n", |
3196 | save->last_excp_from, save->last_excp_to); | 3293 | "br_from:", save->br_from, "br_to:", save->br_to); |
3197 | 3294 | pr_err("%-15s %016llx %-13s %016llx\n", | |
3295 | "excp_from:", save->last_excp_from, | ||
3296 | "excp_to:", save->last_excp_to); | ||
3198 | } | 3297 | } |
3199 | 3298 | ||
3200 | static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | 3299 | static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) |
@@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
3384 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) | 3483 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) |
3385 | return 0; | 3484 | return 0; |
3386 | 3485 | ||
3387 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | 3486 | ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); |
3388 | 3487 | ||
3389 | if (is_guest_mode(vcpu)) | 3488 | if (is_guest_mode(vcpu)) |
3390 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | 3489 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); |
@@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
3871 | update_cr0_intercept(svm); | 3970 | update_cr0_intercept(svm); |
3872 | } | 3971 | } |
3873 | 3972 | ||
3973 | #define PRE_EX(exit) { .exit_code = (exit), \ | ||
3974 | .stage = X86_ICPT_PRE_EXCEPT, } | ||
3975 | #define POST_EX(exit) { .exit_code = (exit), \ | ||
3976 | .stage = X86_ICPT_POST_EXCEPT, } | ||
3977 | #define POST_MEM(exit) { .exit_code = (exit), \ | ||
3978 | .stage = X86_ICPT_POST_MEMACCESS, } | ||
3979 | |||
3980 | static struct __x86_intercept { | ||
3981 | u32 exit_code; | ||
3982 | enum x86_intercept_stage stage; | ||
3983 | } x86_intercept_map[] = { | ||
3984 | [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), | ||
3985 | [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), | ||
3986 | [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), | ||
3987 | [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), | ||
3988 | [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), | ||
3989 | [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), | ||
3990 | [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), | ||
3991 | [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), | ||
3992 | [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), | ||
3993 | [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), | ||
3994 | [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), | ||
3995 | [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), | ||
3996 | [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), | ||
3997 | [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), | ||
3998 | [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), | ||
3999 | [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), | ||
4000 | [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), | ||
4001 | [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), | ||
4002 | [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), | ||
4003 | [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), | ||
4004 | [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), | ||
4005 | [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), | ||
4006 | [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), | ||
4007 | [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), | ||
4008 | [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), | ||
4009 | [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), | ||
4010 | [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), | ||
4011 | [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), | ||
4012 | [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), | ||
4013 | [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), | ||
4014 | [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), | ||
4015 | [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), | ||
4016 | [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), | ||
4017 | [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), | ||
4018 | [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), | ||
4019 | [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), | ||
4020 | [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), | ||
4021 | [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), | ||
4022 | [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), | ||
4023 | [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), | ||
4024 | [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), | ||
4025 | [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), | ||
4026 | [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), | ||
4027 | [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), | ||
4028 | [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), | ||
4029 | [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), | ||
4030 | }; | ||
4031 | |||
4032 | #undef PRE_EX | ||
4033 | #undef POST_EX | ||
4034 | #undef POST_MEM | ||
4035 | |||
4036 | static int svm_check_intercept(struct kvm_vcpu *vcpu, | ||
4037 | struct x86_instruction_info *info, | ||
4038 | enum x86_intercept_stage stage) | ||
4039 | { | ||
4040 | struct vcpu_svm *svm = to_svm(vcpu); | ||
4041 | int vmexit, ret = X86EMUL_CONTINUE; | ||
4042 | struct __x86_intercept icpt_info; | ||
4043 | struct vmcb *vmcb = svm->vmcb; | ||
4044 | |||
4045 | if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) | ||
4046 | goto out; | ||
4047 | |||
4048 | icpt_info = x86_intercept_map[info->intercept]; | ||
4049 | |||
4050 | if (stage != icpt_info.stage) | ||
4051 | goto out; | ||
4052 | |||
4053 | switch (icpt_info.exit_code) { | ||
4054 | case SVM_EXIT_READ_CR0: | ||
4055 | if (info->intercept == x86_intercept_cr_read) | ||
4056 | icpt_info.exit_code += info->modrm_reg; | ||
4057 | break; | ||
4058 | case SVM_EXIT_WRITE_CR0: { | ||
4059 | unsigned long cr0, val; | ||
4060 | u64 intercept; | ||
4061 | |||
4062 | if (info->intercept == x86_intercept_cr_write) | ||
4063 | icpt_info.exit_code += info->modrm_reg; | ||
4064 | |||
4065 | if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) | ||
4066 | break; | ||
4067 | |||
4068 | intercept = svm->nested.intercept; | ||
4069 | |||
4070 | if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) | ||
4071 | break; | ||
4072 | |||
4073 | cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; | ||
4074 | val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; | ||
4075 | |||
4076 | if (info->intercept == x86_intercept_lmsw) { | ||
4077 | cr0 &= 0xfUL; | ||
4078 | val &= 0xfUL; | ||
4079 | /* lmsw can't clear PE - catch this here */ | ||
4080 | if (cr0 & X86_CR0_PE) | ||
4081 | val |= X86_CR0_PE; | ||
4082 | } | ||
4083 | |||
4084 | if (cr0 ^ val) | ||
4085 | icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
4086 | |||
4087 | break; | ||
4088 | } | ||
4089 | case SVM_EXIT_READ_DR0: | ||
4090 | case SVM_EXIT_WRITE_DR0: | ||
4091 | icpt_info.exit_code += info->modrm_reg; | ||
4092 | break; | ||
4093 | case SVM_EXIT_MSR: | ||
4094 | if (info->intercept == x86_intercept_wrmsr) | ||
4095 | vmcb->control.exit_info_1 = 1; | ||
4096 | else | ||
4097 | vmcb->control.exit_info_1 = 0; | ||
4098 | break; | ||
4099 | case SVM_EXIT_PAUSE: | ||
4100 | /* | ||
4101 | * We get this for NOP only, but pause | ||
4102 | * is rep not, check this here | ||
4103 | */ | ||
4104 | if (info->rep_prefix != REPE_PREFIX) | ||
4105 | goto out; | ||
4106 | case SVM_EXIT_IOIO: { | ||
4107 | u64 exit_info; | ||
4108 | u32 bytes; | ||
4109 | |||
4110 | exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; | ||
4111 | |||
4112 | if (info->intercept == x86_intercept_in || | ||
4113 | info->intercept == x86_intercept_ins) { | ||
4114 | exit_info |= SVM_IOIO_TYPE_MASK; | ||
4115 | bytes = info->src_bytes; | ||
4116 | } else { | ||
4117 | bytes = info->dst_bytes; | ||
4118 | } | ||
4119 | |||
4120 | if (info->intercept == x86_intercept_outs || | ||
4121 | info->intercept == x86_intercept_ins) | ||
4122 | exit_info |= SVM_IOIO_STR_MASK; | ||
4123 | |||
4124 | if (info->rep_prefix) | ||
4125 | exit_info |= SVM_IOIO_REP_MASK; | ||
4126 | |||
4127 | bytes = min(bytes, 4u); | ||
4128 | |||
4129 | exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; | ||
4130 | |||
4131 | exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); | ||
4132 | |||
4133 | vmcb->control.exit_info_1 = exit_info; | ||
4134 | vmcb->control.exit_info_2 = info->next_rip; | ||
4135 | |||
4136 | break; | ||
4137 | } | ||
4138 | default: | ||
4139 | break; | ||
4140 | } | ||
4141 | |||
4142 | vmcb->control.next_rip = info->next_rip; | ||
4143 | vmcb->control.exit_code = icpt_info.exit_code; | ||
4144 | vmexit = nested_svm_exit_handled(svm); | ||
4145 | |||
4146 | ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED | ||
4147 | : X86EMUL_CONTINUE; | ||
4148 | |||
4149 | out: | ||
4150 | return ret; | ||
4151 | } | ||
4152 | |||
3874 | static struct kvm_x86_ops svm_x86_ops = { | 4153 | static struct kvm_x86_ops svm_x86_ops = { |
3875 | .cpu_has_kvm_support = has_svm, | 4154 | .cpu_has_kvm_support = has_svm, |
3876 | .disabled_by_bios = is_disabled, | 4155 | .disabled_by_bios = is_disabled, |
@@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3952 | 4231 | ||
3953 | .has_wbinvd_exit = svm_has_wbinvd_exit, | 4232 | .has_wbinvd_exit = svm_has_wbinvd_exit, |
3954 | 4233 | ||
4234 | .set_tsc_khz = svm_set_tsc_khz, | ||
3955 | .write_tsc_offset = svm_write_tsc_offset, | 4235 | .write_tsc_offset = svm_write_tsc_offset, |
3956 | .adjust_tsc_offset = svm_adjust_tsc_offset, | 4236 | .adjust_tsc_offset = svm_adjust_tsc_offset, |
4237 | .compute_tsc_offset = svm_compute_tsc_offset, | ||
3957 | 4238 | ||
3958 | .set_tdp_cr3 = set_tdp_cr3, | 4239 | .set_tdp_cr3 = set_tdp_cr3, |
4240 | |||
4241 | .check_intercept = svm_check_intercept, | ||
3959 | }; | 4242 | }; |
3960 | 4243 | ||
3961 | static int __init svm_init(void) | 4244 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5b4cdcbd154c..4c3fa0f67469 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -128,8 +128,11 @@ struct vcpu_vmx { | |||
128 | unsigned long host_rsp; | 128 | unsigned long host_rsp; |
129 | int launched; | 129 | int launched; |
130 | u8 fail; | 130 | u8 fail; |
131 | u8 cpl; | ||
132 | bool nmi_known_unmasked; | ||
131 | u32 exit_intr_info; | 133 | u32 exit_intr_info; |
132 | u32 idt_vectoring_info; | 134 | u32 idt_vectoring_info; |
135 | ulong rflags; | ||
133 | struct shared_msr_entry *guest_msrs; | 136 | struct shared_msr_entry *guest_msrs; |
134 | int nmsrs; | 137 | int nmsrs; |
135 | int save_nmsrs; | 138 | int save_nmsrs; |
@@ -159,6 +162,10 @@ struct vcpu_vmx { | |||
159 | u32 ar; | 162 | u32 ar; |
160 | } tr, es, ds, fs, gs; | 163 | } tr, es, ds, fs, gs; |
161 | } rmode; | 164 | } rmode; |
165 | struct { | ||
166 | u32 bitmask; /* 4 bits per segment (1 bit per field) */ | ||
167 | struct kvm_save_segment seg[8]; | ||
168 | } segment_cache; | ||
162 | int vpid; | 169 | int vpid; |
163 | bool emulation_required; | 170 | bool emulation_required; |
164 | 171 | ||
@@ -171,6 +178,15 @@ struct vcpu_vmx { | |||
171 | bool rdtscp_enabled; | 178 | bool rdtscp_enabled; |
172 | }; | 179 | }; |
173 | 180 | ||
181 | enum segment_cache_field { | ||
182 | SEG_FIELD_SEL = 0, | ||
183 | SEG_FIELD_BASE = 1, | ||
184 | SEG_FIELD_LIMIT = 2, | ||
185 | SEG_FIELD_AR = 3, | ||
186 | |||
187 | SEG_FIELD_NR = 4 | ||
188 | }; | ||
189 | |||
174 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 190 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
175 | { | 191 | { |
176 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 192 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
@@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) | |||
643 | vmcs_writel(field, vmcs_readl(field) | mask); | 659 | vmcs_writel(field, vmcs_readl(field) | mask); |
644 | } | 660 | } |
645 | 661 | ||
662 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) | ||
663 | { | ||
664 | vmx->segment_cache.bitmask = 0; | ||
665 | } | ||
666 | |||
667 | static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, | ||
668 | unsigned field) | ||
669 | { | ||
670 | bool ret; | ||
671 | u32 mask = 1 << (seg * SEG_FIELD_NR + field); | ||
672 | |||
673 | if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { | ||
674 | vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); | ||
675 | vmx->segment_cache.bitmask = 0; | ||
676 | } | ||
677 | ret = vmx->segment_cache.bitmask & mask; | ||
678 | vmx->segment_cache.bitmask |= mask; | ||
679 | return ret; | ||
680 | } | ||
681 | |||
682 | static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) | ||
683 | { | ||
684 | u16 *p = &vmx->segment_cache.seg[seg].selector; | ||
685 | |||
686 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) | ||
687 | *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); | ||
688 | return *p; | ||
689 | } | ||
690 | |||
691 | static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) | ||
692 | { | ||
693 | ulong *p = &vmx->segment_cache.seg[seg].base; | ||
694 | |||
695 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) | ||
696 | *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); | ||
697 | return *p; | ||
698 | } | ||
699 | |||
700 | static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) | ||
701 | { | ||
702 | u32 *p = &vmx->segment_cache.seg[seg].limit; | ||
703 | |||
704 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) | ||
705 | *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); | ||
706 | return *p; | ||
707 | } | ||
708 | |||
709 | static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) | ||
710 | { | ||
711 | u32 *p = &vmx->segment_cache.seg[seg].ar; | ||
712 | |||
713 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) | ||
714 | *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); | ||
715 | return *p; | ||
716 | } | ||
717 | |||
646 | static void update_exception_bitmap(struct kvm_vcpu *vcpu) | 718 | static void update_exception_bitmap(struct kvm_vcpu *vcpu) |
647 | { | 719 | { |
648 | u32 eb; | 720 | u32 eb; |
@@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
970 | { | 1042 | { |
971 | unsigned long rflags, save_rflags; | 1043 | unsigned long rflags, save_rflags; |
972 | 1044 | ||
973 | rflags = vmcs_readl(GUEST_RFLAGS); | 1045 | if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { |
974 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1046 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); |
975 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; | 1047 | rflags = vmcs_readl(GUEST_RFLAGS); |
976 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | 1048 | if (to_vmx(vcpu)->rmode.vm86_active) { |
977 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | 1049 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
1050 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | ||
1051 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | ||
1052 | } | ||
1053 | to_vmx(vcpu)->rflags = rflags; | ||
978 | } | 1054 | } |
979 | return rflags; | 1055 | return to_vmx(vcpu)->rflags; |
980 | } | 1056 | } |
981 | 1057 | ||
982 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1058 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
983 | { | 1059 | { |
1060 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); | ||
1061 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
1062 | to_vmx(vcpu)->rflags = rflags; | ||
984 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1063 | if (to_vmx(vcpu)->rmode.vm86_active) { |
985 | to_vmx(vcpu)->rmode.save_rflags = rflags; | 1064 | to_vmx(vcpu)->rmode.save_rflags = rflags; |
986 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 1065 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
@@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1053 | } | 1132 | } |
1054 | 1133 | ||
1055 | if (vmx->rmode.vm86_active) { | 1134 | if (vmx->rmode.vm86_active) { |
1056 | if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) | 1135 | int inc_eip = 0; |
1136 | if (kvm_exception_is_soft(nr)) | ||
1137 | inc_eip = vcpu->arch.event_exit_inst_len; | ||
1138 | if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) | ||
1057 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 1139 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
1058 | return; | 1140 | return; |
1059 | } | 1141 | } |
@@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void) | |||
1151 | } | 1233 | } |
1152 | 1234 | ||
1153 | /* | 1235 | /* |
1236 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ | ||
1237 | * ioctl. In this case the call-back should update internal vmx state to make | ||
1238 | * the changes effective. | ||
1239 | */ | ||
1240 | static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | ||
1241 | { | ||
1242 | /* Nothing to do here */ | ||
1243 | } | ||
1244 | |||
1245 | /* | ||
1154 | * writes 'offset' into guest's timestamp counter offset register | 1246 | * writes 'offset' into guest's timestamp counter offset register |
1155 | */ | 1247 | */ |
1156 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | 1248 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
@@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | |||
1164 | vmcs_write64(TSC_OFFSET, offset + adjustment); | 1256 | vmcs_write64(TSC_OFFSET, offset + adjustment); |
1165 | } | 1257 | } |
1166 | 1258 | ||
1259 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | ||
1260 | { | ||
1261 | return target_tsc - native_read_tsc(); | ||
1262 | } | ||
1263 | |||
1167 | /* | 1264 | /* |
1168 | * Reads an msr value (of 'msr_index') into 'pdata'. | 1265 | * Reads an msr value (of 'msr_index') into 'pdata'. |
1169 | * Returns 0 on success, non-0 otherwise. | 1266 | * Returns 0 on success, non-0 otherwise. |
@@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1243 | break; | 1340 | break; |
1244 | #ifdef CONFIG_X86_64 | 1341 | #ifdef CONFIG_X86_64 |
1245 | case MSR_FS_BASE: | 1342 | case MSR_FS_BASE: |
1343 | vmx_segment_cache_clear(vmx); | ||
1246 | vmcs_writel(GUEST_FS_BASE, data); | 1344 | vmcs_writel(GUEST_FS_BASE, data); |
1247 | break; | 1345 | break; |
1248 | case MSR_GS_BASE: | 1346 | case MSR_GS_BASE: |
1347 | vmx_segment_cache_clear(vmx); | ||
1249 | vmcs_writel(GUEST_GS_BASE, data); | 1348 | vmcs_writel(GUEST_GS_BASE, data); |
1250 | break; | 1349 | break; |
1251 | case MSR_KERNEL_GS_BASE: | 1350 | case MSR_KERNEL_GS_BASE: |
@@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1689 | vmx->emulation_required = 1; | 1788 | vmx->emulation_required = 1; |
1690 | vmx->rmode.vm86_active = 0; | 1789 | vmx->rmode.vm86_active = 0; |
1691 | 1790 | ||
1791 | vmx_segment_cache_clear(vmx); | ||
1792 | |||
1692 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); | 1793 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); |
1693 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); | 1794 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); |
1694 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); | 1795 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); |
@@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1712 | fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); | 1813 | fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); |
1713 | fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); | 1814 | fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); |
1714 | 1815 | ||
1816 | vmx_segment_cache_clear(vmx); | ||
1817 | |||
1715 | vmcs_write16(GUEST_SS_SELECTOR, 0); | 1818 | vmcs_write16(GUEST_SS_SELECTOR, 0); |
1716 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); | 1819 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); |
1717 | 1820 | ||
@@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1775 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 1878 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
1776 | } | 1879 | } |
1777 | 1880 | ||
1881 | vmx_segment_cache_clear(vmx); | ||
1882 | |||
1778 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); | 1883 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); |
1779 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1884 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
1780 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1885 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
@@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1851 | { | 1956 | { |
1852 | u32 guest_tr_ar; | 1957 | u32 guest_tr_ar; |
1853 | 1958 | ||
1959 | vmx_segment_cache_clear(to_vmx(vcpu)); | ||
1960 | |||
1854 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); | 1961 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); |
1855 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { | 1962 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { |
1856 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", | 1963 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", |
@@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1998 | vmcs_writel(CR0_READ_SHADOW, cr0); | 2105 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1999 | vmcs_writel(GUEST_CR0, hw_cr0); | 2106 | vmcs_writel(GUEST_CR0, hw_cr0); |
2000 | vcpu->arch.cr0 = cr0; | 2107 | vcpu->arch.cr0 = cr0; |
2108 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
2001 | } | 2109 | } |
2002 | 2110 | ||
2003 | static u64 construct_eptp(unsigned long root_hpa) | 2111 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
2053 | struct kvm_segment *var, int seg) | 2161 | struct kvm_segment *var, int seg) |
2054 | { | 2162 | { |
2055 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2163 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2056 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2057 | struct kvm_save_segment *save; | 2164 | struct kvm_save_segment *save; |
2058 | u32 ar; | 2165 | u32 ar; |
2059 | 2166 | ||
@@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
2075 | var->limit = save->limit; | 2182 | var->limit = save->limit; |
2076 | ar = save->ar; | 2183 | ar = save->ar; |
2077 | if (seg == VCPU_SREG_TR | 2184 | if (seg == VCPU_SREG_TR |
2078 | || var->selector == vmcs_read16(sf->selector)) | 2185 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) |
2079 | goto use_saved_rmode_seg; | 2186 | goto use_saved_rmode_seg; |
2080 | } | 2187 | } |
2081 | var->base = vmcs_readl(sf->base); | 2188 | var->base = vmx_read_guest_seg_base(vmx, seg); |
2082 | var->limit = vmcs_read32(sf->limit); | 2189 | var->limit = vmx_read_guest_seg_limit(vmx, seg); |
2083 | var->selector = vmcs_read16(sf->selector); | 2190 | var->selector = vmx_read_guest_seg_selector(vmx, seg); |
2084 | ar = vmcs_read32(sf->ar_bytes); | 2191 | ar = vmx_read_guest_seg_ar(vmx, seg); |
2085 | use_saved_rmode_seg: | 2192 | use_saved_rmode_seg: |
2086 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | 2193 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) |
2087 | ar = 0; | 2194 | ar = 0; |
@@ -2098,27 +2205,37 @@ use_saved_rmode_seg: | |||
2098 | 2205 | ||
2099 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | 2206 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) |
2100 | { | 2207 | { |
2101 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2102 | struct kvm_segment s; | 2208 | struct kvm_segment s; |
2103 | 2209 | ||
2104 | if (to_vmx(vcpu)->rmode.vm86_active) { | 2210 | if (to_vmx(vcpu)->rmode.vm86_active) { |
2105 | vmx_get_segment(vcpu, &s, seg); | 2211 | vmx_get_segment(vcpu, &s, seg); |
2106 | return s.base; | 2212 | return s.base; |
2107 | } | 2213 | } |
2108 | return vmcs_readl(sf->base); | 2214 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); |
2109 | } | 2215 | } |
2110 | 2216 | ||
2111 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 2217 | static int __vmx_get_cpl(struct kvm_vcpu *vcpu) |
2112 | { | 2218 | { |
2113 | if (!is_protmode(vcpu)) | 2219 | if (!is_protmode(vcpu)) |
2114 | return 0; | 2220 | return 0; |
2115 | 2221 | ||
2116 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | 2222 | if (!is_long_mode(vcpu) |
2223 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ | ||
2117 | return 3; | 2224 | return 3; |
2118 | 2225 | ||
2119 | return vmcs_read16(GUEST_CS_SELECTOR) & 3; | 2226 | return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; |
2120 | } | 2227 | } |
2121 | 2228 | ||
2229 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | ||
2230 | { | ||
2231 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { | ||
2232 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
2233 | to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); | ||
2234 | } | ||
2235 | return to_vmx(vcpu)->cpl; | ||
2236 | } | ||
2237 | |||
2238 | |||
2122 | static u32 vmx_segment_access_rights(struct kvm_segment *var) | 2239 | static u32 vmx_segment_access_rights(struct kvm_segment *var) |
2123 | { | 2240 | { |
2124 | u32 ar; | 2241 | u32 ar; |
@@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2148 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2265 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2149 | u32 ar; | 2266 | u32 ar; |
2150 | 2267 | ||
2268 | vmx_segment_cache_clear(vmx); | ||
2269 | |||
2151 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 2270 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { |
2152 | vmcs_write16(sf->selector, var->selector); | 2271 | vmcs_write16(sf->selector, var->selector); |
2153 | vmx->rmode.tr.selector = var->selector; | 2272 | vmx->rmode.tr.selector = var->selector; |
@@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2184 | ar |= 0x1; /* Accessed */ | 2303 | ar |= 0x1; /* Accessed */ |
2185 | 2304 | ||
2186 | vmcs_write32(sf->ar_bytes, ar); | 2305 | vmcs_write32(sf->ar_bytes, ar); |
2306 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
2187 | } | 2307 | } |
2188 | 2308 | ||
2189 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 2309 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
2190 | { | 2310 | { |
2191 | u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); | 2311 | u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); |
2192 | 2312 | ||
2193 | *db = (ar >> 14) & 1; | 2313 | *db = (ar >> 14) & 1; |
2194 | *l = (ar >> 13) & 1; | 2314 | *l = (ar >> 13) & 1; |
@@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2775 | if (ret != 0) | 2895 | if (ret != 0) |
2776 | goto out; | 2896 | goto out; |
2777 | 2897 | ||
2898 | vmx_segment_cache_clear(vmx); | ||
2899 | |||
2778 | seg_setup(VCPU_SREG_CS); | 2900 | seg_setup(VCPU_SREG_CS); |
2779 | /* | 2901 | /* |
2780 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | 2902 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode |
@@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
2904 | 3026 | ||
2905 | ++vcpu->stat.irq_injections; | 3027 | ++vcpu->stat.irq_injections; |
2906 | if (vmx->rmode.vm86_active) { | 3028 | if (vmx->rmode.vm86_active) { |
2907 | if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) | 3029 | int inc_eip = 0; |
3030 | if (vcpu->arch.interrupt.soft) | ||
3031 | inc_eip = vcpu->arch.event_exit_inst_len; | ||
3032 | if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) | ||
2908 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 3033 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2909 | return; | 3034 | return; |
2910 | } | 3035 | } |
@@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2937 | } | 3062 | } |
2938 | 3063 | ||
2939 | ++vcpu->stat.nmi_injections; | 3064 | ++vcpu->stat.nmi_injections; |
3065 | vmx->nmi_known_unmasked = false; | ||
2940 | if (vmx->rmode.vm86_active) { | 3066 | if (vmx->rmode.vm86_active) { |
2941 | if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) | 3067 | if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) |
2942 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 3068 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2943 | return; | 3069 | return; |
2944 | } | 3070 | } |
@@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | |||
2961 | { | 3087 | { |
2962 | if (!cpu_has_virtual_nmis()) | 3088 | if (!cpu_has_virtual_nmis()) |
2963 | return to_vmx(vcpu)->soft_vnmi_blocked; | 3089 | return to_vmx(vcpu)->soft_vnmi_blocked; |
3090 | if (to_vmx(vcpu)->nmi_known_unmasked) | ||
3091 | return false; | ||
2964 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; | 3092 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; |
2965 | } | 3093 | } |
2966 | 3094 | ||
@@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
2974 | vmx->vnmi_blocked_time = 0; | 3102 | vmx->vnmi_blocked_time = 0; |
2975 | } | 3103 | } |
2976 | } else { | 3104 | } else { |
3105 | vmx->nmi_known_unmasked = !masked; | ||
2977 | if (masked) | 3106 | if (masked) |
2978 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3107 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
2979 | GUEST_INTR_STATE_NMI); | 3108 | GUEST_INTR_STATE_NMI); |
@@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3091 | enum emulation_result er; | 3220 | enum emulation_result er; |
3092 | 3221 | ||
3093 | vect_info = vmx->idt_vectoring_info; | 3222 | vect_info = vmx->idt_vectoring_info; |
3094 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 3223 | intr_info = vmx->exit_intr_info; |
3095 | 3224 | ||
3096 | if (is_machine_check(intr_info)) | 3225 | if (is_machine_check(intr_info)) |
3097 | return handle_machine_check(vcpu); | 3226 | return handle_machine_check(vcpu); |
@@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3122 | } | 3251 | } |
3123 | 3252 | ||
3124 | error_code = 0; | 3253 | error_code = 0; |
3125 | rip = kvm_rip_read(vcpu); | ||
3126 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 3254 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
3127 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 3255 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
3128 | if (is_page_fault(intr_info)) { | 3256 | if (is_page_fault(intr_info)) { |
@@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3169 | vmx->vcpu.arch.event_exit_inst_len = | 3297 | vmx->vcpu.arch.event_exit_inst_len = |
3170 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 3298 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
3171 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 3299 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
3300 | rip = kvm_rip_read(vcpu); | ||
3172 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; | 3301 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; |
3173 | kvm_run->debug.arch.exception = ex_no; | 3302 | kvm_run->debug.arch.exception = ex_no; |
3174 | break; | 3303 | break; |
@@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3505 | switch (type) { | 3634 | switch (type) { |
3506 | case INTR_TYPE_NMI_INTR: | 3635 | case INTR_TYPE_NMI_INTR: |
3507 | vcpu->arch.nmi_injected = false; | 3636 | vcpu->arch.nmi_injected = false; |
3508 | if (cpu_has_virtual_nmis()) | 3637 | vmx_set_nmi_mask(vcpu, true); |
3509 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
3510 | GUEST_INTR_STATE_NMI); | ||
3511 | break; | 3638 | break; |
3512 | case INTR_TYPE_EXT_INTR: | 3639 | case INTR_TYPE_EXT_INTR: |
3513 | case INTR_TYPE_SOFT_INTR: | 3640 | case INTR_TYPE_SOFT_INTR: |
@@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
3867 | 3994 | ||
3868 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 3995 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
3869 | { | 3996 | { |
3870 | u32 exit_intr_info = vmx->exit_intr_info; | 3997 | u32 exit_intr_info; |
3998 | |||
3999 | if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY | ||
4000 | || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) | ||
4001 | return; | ||
4002 | |||
4003 | vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
4004 | exit_intr_info = vmx->exit_intr_info; | ||
3871 | 4005 | ||
3872 | /* Handle machine checks before interrupts are enabled */ | 4006 | /* Handle machine checks before interrupts are enabled */ |
3873 | if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) | 4007 | if (is_machine_check(exit_intr_info)) |
3874 | || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI | ||
3875 | && is_machine_check(exit_intr_info))) | ||
3876 | kvm_machine_check(); | 4008 | kvm_machine_check(); |
3877 | 4009 | ||
3878 | /* We need to handle NMIs before interrupts are enabled */ | 4010 | /* We need to handle NMIs before interrupts are enabled */ |
@@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | |||
3886 | 4018 | ||
3887 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | 4019 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
3888 | { | 4020 | { |
3889 | u32 exit_intr_info = vmx->exit_intr_info; | 4021 | u32 exit_intr_info; |
3890 | bool unblock_nmi; | 4022 | bool unblock_nmi; |
3891 | u8 vector; | 4023 | u8 vector; |
3892 | bool idtv_info_valid; | 4024 | bool idtv_info_valid; |
@@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | |||
3894 | idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 4026 | idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
3895 | 4027 | ||
3896 | if (cpu_has_virtual_nmis()) { | 4028 | if (cpu_has_virtual_nmis()) { |
4029 | if (vmx->nmi_known_unmasked) | ||
4030 | return; | ||
4031 | /* | ||
4032 | * Can't use vmx->exit_intr_info since we're not sure what | ||
4033 | * the exit reason is. | ||
4034 | */ | ||
4035 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
3897 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; | 4036 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; |
3898 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | 4037 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; |
3899 | /* | 4038 | /* |
@@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | |||
3910 | vector != DF_VECTOR && !idtv_info_valid) | 4049 | vector != DF_VECTOR && !idtv_info_valid) |
3911 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 4050 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
3912 | GUEST_INTR_STATE_NMI); | 4051 | GUEST_INTR_STATE_NMI); |
4052 | else | ||
4053 | vmx->nmi_known_unmasked = | ||
4054 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | ||
4055 | & GUEST_INTR_STATE_NMI); | ||
3913 | } else if (unlikely(vmx->soft_vnmi_blocked)) | 4056 | } else if (unlikely(vmx->soft_vnmi_blocked)) |
3914 | vmx->vnmi_blocked_time += | 4057 | vmx->vnmi_blocked_time += |
3915 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | 4058 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); |
@@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
3946 | * Clear bit "block by NMI" before VM entry if a NMI | 4089 | * Clear bit "block by NMI" before VM entry if a NMI |
3947 | * delivery faulted. | 4090 | * delivery faulted. |
3948 | */ | 4091 | */ |
3949 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | 4092 | vmx_set_nmi_mask(&vmx->vcpu, false); |
3950 | GUEST_INTR_STATE_NMI); | ||
3951 | break; | 4093 | break; |
3952 | case INTR_TYPE_SOFT_EXCEPTION: | 4094 | case INTR_TYPE_SOFT_EXCEPTION: |
3953 | vmx->vcpu.arch.event_exit_inst_len = | 4095 | vmx->vcpu.arch.event_exit_inst_len = |
@@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4124 | ); | 4266 | ); |
4125 | 4267 | ||
4126 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | 4268 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) |
4269 | | (1 << VCPU_EXREG_RFLAGS) | ||
4270 | | (1 << VCPU_EXREG_CPL) | ||
4127 | | (1 << VCPU_EXREG_PDPTR) | 4271 | | (1 << VCPU_EXREG_PDPTR) |
4272 | | (1 << VCPU_EXREG_SEGMENTS) | ||
4128 | | (1 << VCPU_EXREG_CR3)); | 4273 | | (1 << VCPU_EXREG_CR3)); |
4129 | vcpu->arch.regs_dirty = 0; | 4274 | vcpu->arch.regs_dirty = 0; |
4130 | 4275 | ||
@@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4134 | vmx->launched = 1; | 4279 | vmx->launched = 1; |
4135 | 4280 | ||
4136 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 4281 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
4137 | vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
4138 | 4282 | ||
4139 | vmx_complete_atomic_exit(vmx); | 4283 | vmx_complete_atomic_exit(vmx); |
4140 | vmx_recover_nmi_blocking(vmx); | 4284 | vmx_recover_nmi_blocking(vmx); |
@@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4195 | goto free_vcpu; | 4339 | goto free_vcpu; |
4196 | 4340 | ||
4197 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | 4341 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); |
4342 | err = -ENOMEM; | ||
4198 | if (!vmx->guest_msrs) { | 4343 | if (!vmx->guest_msrs) { |
4199 | err = -ENOMEM; | ||
4200 | goto uninit_vcpu; | 4344 | goto uninit_vcpu; |
4201 | } | 4345 | } |
4202 | 4346 | ||
@@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4215 | if (err) | 4359 | if (err) |
4216 | goto free_vmcs; | 4360 | goto free_vmcs; |
4217 | if (vm_need_virtualize_apic_accesses(kvm)) | 4361 | if (vm_need_virtualize_apic_accesses(kvm)) |
4218 | if (alloc_apic_access_page(kvm) != 0) | 4362 | err = alloc_apic_access_page(kvm); |
4363 | if (err) | ||
4219 | goto free_vmcs; | 4364 | goto free_vmcs; |
4220 | 4365 | ||
4221 | if (enable_ept) { | 4366 | if (enable_ept) { |
@@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
4368 | { | 4513 | { |
4369 | } | 4514 | } |
4370 | 4515 | ||
4516 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, | ||
4517 | struct x86_instruction_info *info, | ||
4518 | enum x86_intercept_stage stage) | ||
4519 | { | ||
4520 | return X86EMUL_CONTINUE; | ||
4521 | } | ||
4522 | |||
4371 | static struct kvm_x86_ops vmx_x86_ops = { | 4523 | static struct kvm_x86_ops vmx_x86_ops = { |
4372 | .cpu_has_kvm_support = cpu_has_kvm_support, | 4524 | .cpu_has_kvm_support = cpu_has_kvm_support, |
4373 | .disabled_by_bios = vmx_disabled_by_bios, | 4525 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4449 | 4601 | ||
4450 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, | 4602 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, |
4451 | 4603 | ||
4604 | .set_tsc_khz = vmx_set_tsc_khz, | ||
4452 | .write_tsc_offset = vmx_write_tsc_offset, | 4605 | .write_tsc_offset = vmx_write_tsc_offset, |
4453 | .adjust_tsc_offset = vmx_adjust_tsc_offset, | 4606 | .adjust_tsc_offset = vmx_adjust_tsc_offset, |
4607 | .compute_tsc_offset = vmx_compute_tsc_offset, | ||
4454 | 4608 | ||
4455 | .set_tdp_cr3 = vmx_set_cr3, | 4609 | .set_tdp_cr3 = vmx_set_cr3, |
4610 | |||
4611 | .check_intercept = vmx_check_intercept, | ||
4456 | }; | 4612 | }; |
4457 | 4613 | ||
4458 | static int __init vmx_init(void) | 4614 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 934b4c6b0bf9..77c9d8673dc4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -60,22 +60,12 @@ | |||
60 | #include <asm/div64.h> | 60 | #include <asm/div64.h> |
61 | 61 | ||
62 | #define MAX_IO_MSRS 256 | 62 | #define MAX_IO_MSRS 256 |
63 | #define CR0_RESERVED_BITS \ | ||
64 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | ||
65 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | ||
66 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | ||
67 | #define CR4_RESERVED_BITS \ | ||
68 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | ||
69 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | ||
70 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
71 | | X86_CR4_OSXSAVE \ | ||
72 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | ||
73 | |||
74 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | ||
75 | |||
76 | #define KVM_MAX_MCE_BANKS 32 | 63 | #define KVM_MAX_MCE_BANKS 32 |
77 | #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) | 64 | #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) |
78 | 65 | ||
66 | #define emul_to_vcpu(ctxt) \ | ||
67 | container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) | ||
68 | |||
79 | /* EFER defaults: | 69 | /* EFER defaults: |
80 | * - enable syscall per default because its emulated by KVM | 70 | * - enable syscall per default because its emulated by KVM |
81 | * - enable LME and LMA per default on 64 bit KVM | 71 | * - enable LME and LMA per default on 64 bit KVM |
@@ -100,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
100 | int ignore_msrs = 0; | 90 | int ignore_msrs = 0; |
101 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 91 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); |
102 | 92 | ||
93 | bool kvm_has_tsc_control; | ||
94 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | ||
95 | u32 kvm_max_guest_tsc_khz; | ||
96 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); | ||
97 | |||
103 | #define KVM_NR_SHARED_MSRS 16 | 98 | #define KVM_NR_SHARED_MSRS 16 |
104 | 99 | ||
105 | struct kvm_shared_msrs_global { | 100 | struct kvm_shared_msrs_global { |
@@ -157,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
157 | 152 | ||
158 | u64 __read_mostly host_xcr0; | 153 | u64 __read_mostly host_xcr0; |
159 | 154 | ||
155 | int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); | ||
156 | |||
160 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | 157 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) |
161 | { | 158 | { |
162 | int i; | 159 | int i; |
@@ -361,8 +358,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
361 | 358 | ||
362 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 359 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
363 | { | 360 | { |
364 | kvm_make_request(KVM_REQ_NMI, vcpu); | ||
365 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 361 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
362 | vcpu->arch.nmi_pending = 1; | ||
366 | } | 363 | } |
367 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | 364 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); |
368 | 365 | ||
@@ -982,7 +979,15 @@ static inline int kvm_tsc_changes_freq(void) | |||
982 | return ret; | 979 | return ret; |
983 | } | 980 | } |
984 | 981 | ||
985 | static inline u64 nsec_to_cycles(u64 nsec) | 982 | static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) |
983 | { | ||
984 | if (vcpu->arch.virtual_tsc_khz) | ||
985 | return vcpu->arch.virtual_tsc_khz; | ||
986 | else | ||
987 | return __this_cpu_read(cpu_tsc_khz); | ||
988 | } | ||
989 | |||
990 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | ||
986 | { | 991 | { |
987 | u64 ret; | 992 | u64 ret; |
988 | 993 | ||
@@ -990,25 +995,24 @@ static inline u64 nsec_to_cycles(u64 nsec) | |||
990 | if (kvm_tsc_changes_freq()) | 995 | if (kvm_tsc_changes_freq()) |
991 | printk_once(KERN_WARNING | 996 | printk_once(KERN_WARNING |
992 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); | 997 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); |
993 | ret = nsec * __this_cpu_read(cpu_tsc_khz); | 998 | ret = nsec * vcpu_tsc_khz(vcpu); |
994 | do_div(ret, USEC_PER_SEC); | 999 | do_div(ret, USEC_PER_SEC); |
995 | return ret; | 1000 | return ret; |
996 | } | 1001 | } |
997 | 1002 | ||
998 | static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) | 1003 | static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) |
999 | { | 1004 | { |
1000 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 1005 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
1001 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 1006 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, |
1002 | &kvm->arch.virtual_tsc_shift, | 1007 | &vcpu->arch.tsc_catchup_shift, |
1003 | &kvm->arch.virtual_tsc_mult); | 1008 | &vcpu->arch.tsc_catchup_mult); |
1004 | kvm->arch.virtual_tsc_khz = this_tsc_khz; | ||
1005 | } | 1009 | } |
1006 | 1010 | ||
1007 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | 1011 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) |
1008 | { | 1012 | { |
1009 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, | 1013 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, |
1010 | vcpu->kvm->arch.virtual_tsc_mult, | 1014 | vcpu->arch.tsc_catchup_mult, |
1011 | vcpu->kvm->arch.virtual_tsc_shift); | 1015 | vcpu->arch.tsc_catchup_shift); |
1012 | tsc += vcpu->arch.last_tsc_write; | 1016 | tsc += vcpu->arch.last_tsc_write; |
1013 | return tsc; | 1017 | return tsc; |
1014 | } | 1018 | } |
@@ -1021,7 +1025,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1021 | s64 sdiff; | 1025 | s64 sdiff; |
1022 | 1026 | ||
1023 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1027 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1024 | offset = data - native_read_tsc(); | 1028 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); |
1025 | ns = get_kernel_ns(); | 1029 | ns = get_kernel_ns(); |
1026 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1030 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1027 | sdiff = data - kvm->arch.last_tsc_write; | 1031 | sdiff = data - kvm->arch.last_tsc_write; |
@@ -1037,13 +1041,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1037 | * In that case, for a reliable TSC, we can match TSC offsets, | 1041 | * In that case, for a reliable TSC, we can match TSC offsets, |
1038 | * or make a best guest using elapsed value. | 1042 | * or make a best guest using elapsed value. |
1039 | */ | 1043 | */ |
1040 | if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && | 1044 | if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && |
1041 | elapsed < 5ULL * NSEC_PER_SEC) { | 1045 | elapsed < 5ULL * NSEC_PER_SEC) { |
1042 | if (!check_tsc_unstable()) { | 1046 | if (!check_tsc_unstable()) { |
1043 | offset = kvm->arch.last_tsc_offset; | 1047 | offset = kvm->arch.last_tsc_offset; |
1044 | pr_debug("kvm: matched tsc offset for %llu\n", data); | 1048 | pr_debug("kvm: matched tsc offset for %llu\n", data); |
1045 | } else { | 1049 | } else { |
1046 | u64 delta = nsec_to_cycles(elapsed); | 1050 | u64 delta = nsec_to_cycles(vcpu, elapsed); |
1047 | offset += delta; | 1051 | offset += delta; |
1048 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); | 1052 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); |
1049 | } | 1053 | } |
@@ -1075,8 +1079,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1075 | local_irq_save(flags); | 1079 | local_irq_save(flags); |
1076 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); | 1080 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); |
1077 | kernel_ns = get_kernel_ns(); | 1081 | kernel_ns = get_kernel_ns(); |
1078 | this_tsc_khz = __this_cpu_read(cpu_tsc_khz); | 1082 | this_tsc_khz = vcpu_tsc_khz(v); |
1079 | |||
1080 | if (unlikely(this_tsc_khz == 0)) { | 1083 | if (unlikely(this_tsc_khz == 0)) { |
1081 | local_irq_restore(flags); | 1084 | local_irq_restore(flags); |
1082 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); | 1085 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); |
@@ -1993,6 +1996,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1993 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1996 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1994 | case KVM_CAP_XSAVE: | 1997 | case KVM_CAP_XSAVE: |
1995 | case KVM_CAP_ASYNC_PF: | 1998 | case KVM_CAP_ASYNC_PF: |
1999 | case KVM_CAP_GET_TSC_KHZ: | ||
1996 | r = 1; | 2000 | r = 1; |
1997 | break; | 2001 | break; |
1998 | case KVM_CAP_COALESCED_MMIO: | 2002 | case KVM_CAP_COALESCED_MMIO: |
@@ -2019,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2019 | case KVM_CAP_XCRS: | 2023 | case KVM_CAP_XCRS: |
2020 | r = cpu_has_xsave; | 2024 | r = cpu_has_xsave; |
2021 | break; | 2025 | break; |
2026 | case KVM_CAP_TSC_CONTROL: | ||
2027 | r = kvm_has_tsc_control; | ||
2028 | break; | ||
2022 | default: | 2029 | default: |
2023 | r = 0; | 2030 | r = 0; |
2024 | break; | 2031 | break; |
@@ -2120,8 +2127,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2120 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 2127 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
2121 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { | 2128 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { |
2122 | /* Make sure TSC doesn't go backwards */ | 2129 | /* Make sure TSC doesn't go backwards */ |
2123 | s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : | 2130 | s64 tsc_delta; |
2124 | native_read_tsc() - vcpu->arch.last_host_tsc; | 2131 | u64 tsc; |
2132 | |||
2133 | kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); | ||
2134 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : | ||
2135 | tsc - vcpu->arch.last_guest_tsc; | ||
2136 | |||
2125 | if (tsc_delta < 0) | 2137 | if (tsc_delta < 0) |
2126 | mark_tsc_unstable("KVM discovered backwards TSC"); | 2138 | mark_tsc_unstable("KVM discovered backwards TSC"); |
2127 | if (check_tsc_unstable()) { | 2139 | if (check_tsc_unstable()) { |
@@ -2139,7 +2151,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2139 | { | 2151 | { |
2140 | kvm_x86_ops->vcpu_put(vcpu); | 2152 | kvm_x86_ops->vcpu_put(vcpu); |
2141 | kvm_put_guest_fpu(vcpu); | 2153 | kvm_put_guest_fpu(vcpu); |
2142 | vcpu->arch.last_host_tsc = native_read_tsc(); | 2154 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); |
2143 | } | 2155 | } |
2144 | 2156 | ||
2145 | static int is_efer_nx(void) | 2157 | static int is_efer_nx(void) |
@@ -2324,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2324 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | 2336 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | |
2325 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | 2337 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
2326 | 2338 | ||
2339 | /* cpuid 0xC0000001.edx */ | ||
2340 | const u32 kvm_supported_word5_x86_features = | ||
2341 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
2342 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
2343 | F(PMM) | F(PMM_EN); | ||
2344 | |||
2327 | /* all calls to cpuid_count() should be made on the same cpu */ | 2345 | /* all calls to cpuid_count() should be made on the same cpu */ |
2328 | get_cpu(); | 2346 | get_cpu(); |
2329 | do_cpuid_1_ent(entry, function, index); | 2347 | do_cpuid_1_ent(entry, function, index); |
@@ -2418,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2418 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | 2436 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | |
2419 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | 2437 | (1 << KVM_FEATURE_NOP_IO_DELAY) | |
2420 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 2438 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
2439 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
2421 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 2440 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); |
2422 | entry->ebx = 0; | 2441 | entry->ebx = 0; |
2423 | entry->ecx = 0; | 2442 | entry->ecx = 0; |
@@ -2432,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2432 | entry->ecx &= kvm_supported_word6_x86_features; | 2451 | entry->ecx &= kvm_supported_word6_x86_features; |
2433 | cpuid_mask(&entry->ecx, 6); | 2452 | cpuid_mask(&entry->ecx, 6); |
2434 | break; | 2453 | break; |
2454 | /*Add support for Centaur's CPUID instruction*/ | ||
2455 | case 0xC0000000: | ||
2456 | /*Just support up to 0xC0000004 now*/ | ||
2457 | entry->eax = min(entry->eax, 0xC0000004); | ||
2458 | break; | ||
2459 | case 0xC0000001: | ||
2460 | entry->edx &= kvm_supported_word5_x86_features; | ||
2461 | cpuid_mask(&entry->edx, 5); | ||
2462 | break; | ||
2463 | case 0xC0000002: | ||
2464 | case 0xC0000003: | ||
2465 | case 0xC0000004: | ||
2466 | /*Now nothing to do, reserved for the future*/ | ||
2467 | break; | ||
2435 | } | 2468 | } |
2436 | 2469 | ||
2437 | kvm_x86_ops->set_supported_cpuid(function, entry); | 2470 | kvm_x86_ops->set_supported_cpuid(function, entry); |
@@ -2478,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
2478 | if (nent >= cpuid->nent) | 2511 | if (nent >= cpuid->nent) |
2479 | goto out_free; | 2512 | goto out_free; |
2480 | 2513 | ||
2514 | /* Add support for Centaur's CPUID instruction. */ | ||
2515 | if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { | ||
2516 | do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, | ||
2517 | &nent, cpuid->nent); | ||
2518 | |||
2519 | r = -E2BIG; | ||
2520 | if (nent >= cpuid->nent) | ||
2521 | goto out_free; | ||
2522 | |||
2523 | limit = cpuid_entries[nent - 1].eax; | ||
2524 | for (func = 0xC0000001; | ||
2525 | func <= limit && nent < cpuid->nent; ++func) | ||
2526 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2527 | &nent, cpuid->nent); | ||
2528 | |||
2529 | r = -E2BIG; | ||
2530 | if (nent >= cpuid->nent) | ||
2531 | goto out_free; | ||
2532 | } | ||
2533 | |||
2481 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | 2534 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, |
2482 | cpuid->nent); | 2535 | cpuid->nent); |
2483 | 2536 | ||
@@ -3046,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3046 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | 3099 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); |
3047 | break; | 3100 | break; |
3048 | } | 3101 | } |
3102 | case KVM_SET_TSC_KHZ: { | ||
3103 | u32 user_tsc_khz; | ||
3104 | |||
3105 | r = -EINVAL; | ||
3106 | if (!kvm_has_tsc_control) | ||
3107 | break; | ||
3108 | |||
3109 | user_tsc_khz = (u32)arg; | ||
3110 | |||
3111 | if (user_tsc_khz >= kvm_max_guest_tsc_khz) | ||
3112 | goto out; | ||
3113 | |||
3114 | kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); | ||
3115 | |||
3116 | r = 0; | ||
3117 | goto out; | ||
3118 | } | ||
3119 | case KVM_GET_TSC_KHZ: { | ||
3120 | r = -EIO; | ||
3121 | if (check_tsc_unstable()) | ||
3122 | goto out; | ||
3123 | |||
3124 | r = vcpu_tsc_khz(vcpu); | ||
3125 | |||
3126 | goto out; | ||
3127 | } | ||
3049 | default: | 3128 | default: |
3050 | r = -EINVAL; | 3129 | r = -EINVAL; |
3051 | } | 3130 | } |
@@ -3595,20 +3674,43 @@ static void kvm_init_msr_list(void) | |||
3595 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | 3674 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, |
3596 | const void *v) | 3675 | const void *v) |
3597 | { | 3676 | { |
3598 | if (vcpu->arch.apic && | 3677 | int handled = 0; |
3599 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) | 3678 | int n; |
3600 | return 0; | ||
3601 | 3679 | ||
3602 | return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3680 | do { |
3681 | n = min(len, 8); | ||
3682 | if (!(vcpu->arch.apic && | ||
3683 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) | ||
3684 | && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) | ||
3685 | break; | ||
3686 | handled += n; | ||
3687 | addr += n; | ||
3688 | len -= n; | ||
3689 | v += n; | ||
3690 | } while (len); | ||
3691 | |||
3692 | return handled; | ||
3603 | } | 3693 | } |
3604 | 3694 | ||
3605 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | 3695 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) |
3606 | { | 3696 | { |
3607 | if (vcpu->arch.apic && | 3697 | int handled = 0; |
3608 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) | 3698 | int n; |
3609 | return 0; | 3699 | |
3700 | do { | ||
3701 | n = min(len, 8); | ||
3702 | if (!(vcpu->arch.apic && | ||
3703 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) | ||
3704 | && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) | ||
3705 | break; | ||
3706 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); | ||
3707 | handled += n; | ||
3708 | addr += n; | ||
3709 | len -= n; | ||
3710 | v += n; | ||
3711 | } while (len); | ||
3610 | 3712 | ||
3611 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3713 | return handled; |
3612 | } | 3714 | } |
3613 | 3715 | ||
3614 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 3716 | static void kvm_set_segment(struct kvm_vcpu *vcpu, |
@@ -3703,37 +3805,43 @@ out: | |||
3703 | } | 3805 | } |
3704 | 3806 | ||
3705 | /* used for instruction fetching */ | 3807 | /* used for instruction fetching */ |
3706 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3808 | static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, |
3707 | struct kvm_vcpu *vcpu, | 3809 | gva_t addr, void *val, unsigned int bytes, |
3708 | struct x86_exception *exception) | 3810 | struct x86_exception *exception) |
3709 | { | 3811 | { |
3812 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3710 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3813 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3814 | |||
3711 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | 3815 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, |
3712 | access | PFERR_FETCH_MASK, | 3816 | access | PFERR_FETCH_MASK, |
3713 | exception); | 3817 | exception); |
3714 | } | 3818 | } |
3715 | 3819 | ||
3716 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3820 | static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, |
3717 | struct kvm_vcpu *vcpu, | 3821 | gva_t addr, void *val, unsigned int bytes, |
3718 | struct x86_exception *exception) | 3822 | struct x86_exception *exception) |
3719 | { | 3823 | { |
3824 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3720 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3825 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3826 | |||
3721 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | 3827 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, |
3722 | exception); | 3828 | exception); |
3723 | } | 3829 | } |
3724 | 3830 | ||
3725 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | 3831 | static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
3726 | struct kvm_vcpu *vcpu, | 3832 | gva_t addr, void *val, unsigned int bytes, |
3727 | struct x86_exception *exception) | 3833 | struct x86_exception *exception) |
3728 | { | 3834 | { |
3835 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3729 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); | 3836 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); |
3730 | } | 3837 | } |
3731 | 3838 | ||
3732 | static int kvm_write_guest_virt_system(gva_t addr, void *val, | 3839 | static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
3840 | gva_t addr, void *val, | ||
3733 | unsigned int bytes, | 3841 | unsigned int bytes, |
3734 | struct kvm_vcpu *vcpu, | ||
3735 | struct x86_exception *exception) | 3842 | struct x86_exception *exception) |
3736 | { | 3843 | { |
3844 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3737 | void *data = val; | 3845 | void *data = val; |
3738 | int r = X86EMUL_CONTINUE; | 3846 | int r = X86EMUL_CONTINUE; |
3739 | 3847 | ||
@@ -3761,13 +3869,15 @@ out: | |||
3761 | return r; | 3869 | return r; |
3762 | } | 3870 | } |
3763 | 3871 | ||
3764 | static int emulator_read_emulated(unsigned long addr, | 3872 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, |
3873 | unsigned long addr, | ||
3765 | void *val, | 3874 | void *val, |
3766 | unsigned int bytes, | 3875 | unsigned int bytes, |
3767 | struct x86_exception *exception, | 3876 | struct x86_exception *exception) |
3768 | struct kvm_vcpu *vcpu) | ||
3769 | { | 3877 | { |
3878 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3770 | gpa_t gpa; | 3879 | gpa_t gpa; |
3880 | int handled; | ||
3771 | 3881 | ||
3772 | if (vcpu->mmio_read_completed) { | 3882 | if (vcpu->mmio_read_completed) { |
3773 | memcpy(val, vcpu->mmio_data, bytes); | 3883 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -3786,7 +3896,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
3786 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3896 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3787 | goto mmio; | 3897 | goto mmio; |
3788 | 3898 | ||
3789 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) | 3899 | if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) |
3790 | == X86EMUL_CONTINUE) | 3900 | == X86EMUL_CONTINUE) |
3791 | return X86EMUL_CONTINUE; | 3901 | return X86EMUL_CONTINUE; |
3792 | 3902 | ||
@@ -3794,18 +3904,24 @@ mmio: | |||
3794 | /* | 3904 | /* |
3795 | * Is this MMIO handled locally? | 3905 | * Is this MMIO handled locally? |
3796 | */ | 3906 | */ |
3797 | if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { | 3907 | handled = vcpu_mmio_read(vcpu, gpa, bytes, val); |
3798 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); | 3908 | |
3909 | if (handled == bytes) | ||
3799 | return X86EMUL_CONTINUE; | 3910 | return X86EMUL_CONTINUE; |
3800 | } | 3911 | |
3912 | gpa += handled; | ||
3913 | bytes -= handled; | ||
3914 | val += handled; | ||
3801 | 3915 | ||
3802 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); | 3916 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); |
3803 | 3917 | ||
3804 | vcpu->mmio_needed = 1; | 3918 | vcpu->mmio_needed = 1; |
3805 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 3919 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3806 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | 3920 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3807 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; | 3921 | vcpu->mmio_size = bytes; |
3922 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
3808 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; | 3923 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; |
3924 | vcpu->mmio_index = 0; | ||
3809 | 3925 | ||
3810 | return X86EMUL_IO_NEEDED; | 3926 | return X86EMUL_IO_NEEDED; |
3811 | } | 3927 | } |
@@ -3829,6 +3945,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
3829 | struct kvm_vcpu *vcpu) | 3945 | struct kvm_vcpu *vcpu) |
3830 | { | 3946 | { |
3831 | gpa_t gpa; | 3947 | gpa_t gpa; |
3948 | int handled; | ||
3832 | 3949 | ||
3833 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); | 3950 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); |
3834 | 3951 | ||
@@ -3847,25 +3964,35 @@ mmio: | |||
3847 | /* | 3964 | /* |
3848 | * Is this MMIO handled locally? | 3965 | * Is this MMIO handled locally? |
3849 | */ | 3966 | */ |
3850 | if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) | 3967 | handled = vcpu_mmio_write(vcpu, gpa, bytes, val); |
3968 | if (handled == bytes) | ||
3851 | return X86EMUL_CONTINUE; | 3969 | return X86EMUL_CONTINUE; |
3852 | 3970 | ||
3971 | gpa += handled; | ||
3972 | bytes -= handled; | ||
3973 | val += handled; | ||
3974 | |||
3853 | vcpu->mmio_needed = 1; | 3975 | vcpu->mmio_needed = 1; |
3976 | memcpy(vcpu->mmio_data, val, bytes); | ||
3854 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 3977 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3855 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | 3978 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3856 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; | 3979 | vcpu->mmio_size = bytes; |
3980 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
3857 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; | 3981 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; |
3858 | memcpy(vcpu->run->mmio.data, val, bytes); | 3982 | memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); |
3983 | vcpu->mmio_index = 0; | ||
3859 | 3984 | ||
3860 | return X86EMUL_CONTINUE; | 3985 | return X86EMUL_CONTINUE; |
3861 | } | 3986 | } |
3862 | 3987 | ||
3863 | int emulator_write_emulated(unsigned long addr, | 3988 | int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, |
3989 | unsigned long addr, | ||
3864 | const void *val, | 3990 | const void *val, |
3865 | unsigned int bytes, | 3991 | unsigned int bytes, |
3866 | struct x86_exception *exception, | 3992 | struct x86_exception *exception) |
3867 | struct kvm_vcpu *vcpu) | ||
3868 | { | 3993 | { |
3994 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3995 | |||
3869 | /* Crossing a page boundary? */ | 3996 | /* Crossing a page boundary? */ |
3870 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3997 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
3871 | int rc, now; | 3998 | int rc, now; |
@@ -3893,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr, | |||
3893 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | 4020 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) |
3894 | #endif | 4021 | #endif |
3895 | 4022 | ||
3896 | static int emulator_cmpxchg_emulated(unsigned long addr, | 4023 | static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, |
4024 | unsigned long addr, | ||
3897 | const void *old, | 4025 | const void *old, |
3898 | const void *new, | 4026 | const void *new, |
3899 | unsigned int bytes, | 4027 | unsigned int bytes, |
3900 | struct x86_exception *exception, | 4028 | struct x86_exception *exception) |
3901 | struct kvm_vcpu *vcpu) | ||
3902 | { | 4029 | { |
4030 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3903 | gpa_t gpa; | 4031 | gpa_t gpa; |
3904 | struct page *page; | 4032 | struct page *page; |
3905 | char *kaddr; | 4033 | char *kaddr; |
@@ -3955,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3955 | emul_write: | 4083 | emul_write: |
3956 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 4084 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3957 | 4085 | ||
3958 | return emulator_write_emulated(addr, new, bytes, exception, vcpu); | 4086 | return emulator_write_emulated(ctxt, addr, new, bytes, exception); |
3959 | } | 4087 | } |
3960 | 4088 | ||
3961 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 4089 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) |
@@ -3974,9 +4102,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
3974 | } | 4102 | } |
3975 | 4103 | ||
3976 | 4104 | ||
3977 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | 4105 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
3978 | unsigned int count, struct kvm_vcpu *vcpu) | 4106 | int size, unsigned short port, void *val, |
4107 | unsigned int count) | ||
3979 | { | 4108 | { |
4109 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4110 | |||
3980 | if (vcpu->arch.pio.count) | 4111 | if (vcpu->arch.pio.count) |
3981 | goto data_avail; | 4112 | goto data_avail; |
3982 | 4113 | ||
@@ -4004,10 +4135,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | |||
4004 | return 0; | 4135 | return 0; |
4005 | } | 4136 | } |
4006 | 4137 | ||
4007 | static int emulator_pio_out_emulated(int size, unsigned short port, | 4138 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, |
4008 | const void *val, unsigned int count, | 4139 | int size, unsigned short port, |
4009 | struct kvm_vcpu *vcpu) | 4140 | const void *val, unsigned int count) |
4010 | { | 4141 | { |
4142 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4143 | |||
4011 | trace_kvm_pio(1, port, size, count); | 4144 | trace_kvm_pio(1, port, size, count); |
4012 | 4145 | ||
4013 | vcpu->arch.pio.port = port; | 4146 | vcpu->arch.pio.port = port; |
@@ -4037,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | |||
4037 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 4170 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
4038 | } | 4171 | } |
4039 | 4172 | ||
4040 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | 4173 | static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) |
4041 | { | 4174 | { |
4042 | kvm_mmu_invlpg(vcpu, address); | 4175 | kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); |
4043 | return X86EMUL_CONTINUE; | ||
4044 | } | 4176 | } |
4045 | 4177 | ||
4046 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | 4178 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) |
@@ -4062,22 +4194,20 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | |||
4062 | } | 4194 | } |
4063 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | 4195 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); |
4064 | 4196 | ||
4065 | int emulate_clts(struct kvm_vcpu *vcpu) | 4197 | static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) |
4066 | { | 4198 | { |
4067 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | 4199 | kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); |
4068 | kvm_x86_ops->fpu_activate(vcpu); | ||
4069 | return X86EMUL_CONTINUE; | ||
4070 | } | 4200 | } |
4071 | 4201 | ||
4072 | int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) | 4202 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
4073 | { | 4203 | { |
4074 | return _kvm_get_dr(vcpu, dr, dest); | 4204 | return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); |
4075 | } | 4205 | } |
4076 | 4206 | ||
4077 | int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) | 4207 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
4078 | { | 4208 | { |
4079 | 4209 | ||
4080 | return __kvm_set_dr(vcpu, dr, value); | 4210 | return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); |
4081 | } | 4211 | } |
4082 | 4212 | ||
4083 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 4213 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
@@ -4085,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val) | |||
4085 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | 4215 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; |
4086 | } | 4216 | } |
4087 | 4217 | ||
4088 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | 4218 | static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) |
4089 | { | 4219 | { |
4220 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4090 | unsigned long value; | 4221 | unsigned long value; |
4091 | 4222 | ||
4092 | switch (cr) { | 4223 | switch (cr) { |
@@ -4113,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
4113 | return value; | 4244 | return value; |
4114 | } | 4245 | } |
4115 | 4246 | ||
4116 | static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | 4247 | static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) |
4117 | { | 4248 | { |
4249 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4118 | int res = 0; | 4250 | int res = 0; |
4119 | 4251 | ||
4120 | switch (cr) { | 4252 | switch (cr) { |
@@ -4141,33 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | |||
4141 | return res; | 4273 | return res; |
4142 | } | 4274 | } |
4143 | 4275 | ||
4144 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | 4276 | static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) |
4277 | { | ||
4278 | return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); | ||
4279 | } | ||
4280 | |||
4281 | static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) | ||
4282 | { | ||
4283 | kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); | ||
4284 | } | ||
4285 | |||
4286 | static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) | ||
4145 | { | 4287 | { |
4146 | return kvm_x86_ops->get_cpl(vcpu); | 4288 | kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); |
4147 | } | 4289 | } |
4148 | 4290 | ||
4149 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | 4291 | static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) |
4150 | { | 4292 | { |
4151 | kvm_x86_ops->get_gdt(vcpu, dt); | 4293 | kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); |
4152 | } | 4294 | } |
4153 | 4295 | ||
4154 | static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | 4296 | static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) |
4155 | { | 4297 | { |
4156 | kvm_x86_ops->get_idt(vcpu, dt); | 4298 | kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); |
4157 | } | 4299 | } |
4158 | 4300 | ||
4159 | static unsigned long emulator_get_cached_segment_base(int seg, | 4301 | static unsigned long emulator_get_cached_segment_base( |
4160 | struct kvm_vcpu *vcpu) | 4302 | struct x86_emulate_ctxt *ctxt, int seg) |
4161 | { | 4303 | { |
4162 | return get_segment_base(vcpu, seg); | 4304 | return get_segment_base(emul_to_vcpu(ctxt), seg); |
4163 | } | 4305 | } |
4164 | 4306 | ||
4165 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, | 4307 | static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, |
4166 | int seg, struct kvm_vcpu *vcpu) | 4308 | struct desc_struct *desc, u32 *base3, |
4309 | int seg) | ||
4167 | { | 4310 | { |
4168 | struct kvm_segment var; | 4311 | struct kvm_segment var; |
4169 | 4312 | ||
4170 | kvm_get_segment(vcpu, &var, seg); | 4313 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); |
4314 | *selector = var.selector; | ||
4171 | 4315 | ||
4172 | if (var.unusable) | 4316 | if (var.unusable) |
4173 | return false; | 4317 | return false; |
@@ -4192,14 +4336,14 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, | |||
4192 | return true; | 4336 | return true; |
4193 | } | 4337 | } |
4194 | 4338 | ||
4195 | static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, | 4339 | static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, |
4196 | int seg, struct kvm_vcpu *vcpu) | 4340 | struct desc_struct *desc, u32 base3, |
4341 | int seg) | ||
4197 | { | 4342 | { |
4343 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4198 | struct kvm_segment var; | 4344 | struct kvm_segment var; |
4199 | 4345 | ||
4200 | /* needed to preserve selector */ | 4346 | var.selector = selector; |
4201 | kvm_get_segment(vcpu, &var, seg); | ||
4202 | |||
4203 | var.base = get_desc_base(desc); | 4347 | var.base = get_desc_base(desc); |
4204 | #ifdef CONFIG_X86_64 | 4348 | #ifdef CONFIG_X86_64 |
4205 | var.base |= ((u64)base3) << 32; | 4349 | var.base |= ((u64)base3) << 32; |
@@ -4223,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, | |||
4223 | return; | 4367 | return; |
4224 | } | 4368 | } |
4225 | 4369 | ||
4226 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | 4370 | static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, |
4371 | u32 msr_index, u64 *pdata) | ||
4227 | { | 4372 | { |
4228 | struct kvm_segment kvm_seg; | 4373 | return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); |
4374 | } | ||
4229 | 4375 | ||
4230 | kvm_get_segment(vcpu, &kvm_seg, seg); | 4376 | static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, |
4231 | return kvm_seg.selector; | 4377 | u32 msr_index, u64 data) |
4378 | { | ||
4379 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); | ||
4232 | } | 4380 | } |
4233 | 4381 | ||
4234 | static void emulator_set_segment_selector(u16 sel, int seg, | 4382 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) |
4235 | struct kvm_vcpu *vcpu) | ||
4236 | { | 4383 | { |
4237 | struct kvm_segment kvm_seg; | 4384 | emul_to_vcpu(ctxt)->arch.halt_request = 1; |
4385 | } | ||
4238 | 4386 | ||
4239 | kvm_get_segment(vcpu, &kvm_seg, seg); | 4387 | static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) |
4240 | kvm_seg.selector = sel; | 4388 | { |
4241 | kvm_set_segment(vcpu, &kvm_seg, seg); | 4389 | preempt_disable(); |
4390 | kvm_load_guest_fpu(emul_to_vcpu(ctxt)); | ||
4391 | /* | ||
4392 | * CR0.TS may reference the host fpu state, not the guest fpu state, | ||
4393 | * so it may be clear at this point. | ||
4394 | */ | ||
4395 | clts(); | ||
4396 | } | ||
4397 | |||
4398 | static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) | ||
4399 | { | ||
4400 | preempt_enable(); | ||
4401 | } | ||
4402 | |||
4403 | static int emulator_intercept(struct x86_emulate_ctxt *ctxt, | ||
4404 | struct x86_instruction_info *info, | ||
4405 | enum x86_intercept_stage stage) | ||
4406 | { | ||
4407 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); | ||
4242 | } | 4408 | } |
4243 | 4409 | ||
4244 | static struct x86_emulate_ops emulate_ops = { | 4410 | static struct x86_emulate_ops emulate_ops = { |
@@ -4248,22 +4414,29 @@ static struct x86_emulate_ops emulate_ops = { | |||
4248 | .read_emulated = emulator_read_emulated, | 4414 | .read_emulated = emulator_read_emulated, |
4249 | .write_emulated = emulator_write_emulated, | 4415 | .write_emulated = emulator_write_emulated, |
4250 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 4416 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
4417 | .invlpg = emulator_invlpg, | ||
4251 | .pio_in_emulated = emulator_pio_in_emulated, | 4418 | .pio_in_emulated = emulator_pio_in_emulated, |
4252 | .pio_out_emulated = emulator_pio_out_emulated, | 4419 | .pio_out_emulated = emulator_pio_out_emulated, |
4253 | .get_cached_descriptor = emulator_get_cached_descriptor, | 4420 | .get_segment = emulator_get_segment, |
4254 | .set_cached_descriptor = emulator_set_cached_descriptor, | 4421 | .set_segment = emulator_set_segment, |
4255 | .get_segment_selector = emulator_get_segment_selector, | ||
4256 | .set_segment_selector = emulator_set_segment_selector, | ||
4257 | .get_cached_segment_base = emulator_get_cached_segment_base, | 4422 | .get_cached_segment_base = emulator_get_cached_segment_base, |
4258 | .get_gdt = emulator_get_gdt, | 4423 | .get_gdt = emulator_get_gdt, |
4259 | .get_idt = emulator_get_idt, | 4424 | .get_idt = emulator_get_idt, |
4425 | .set_gdt = emulator_set_gdt, | ||
4426 | .set_idt = emulator_set_idt, | ||
4260 | .get_cr = emulator_get_cr, | 4427 | .get_cr = emulator_get_cr, |
4261 | .set_cr = emulator_set_cr, | 4428 | .set_cr = emulator_set_cr, |
4262 | .cpl = emulator_get_cpl, | 4429 | .cpl = emulator_get_cpl, |
4263 | .get_dr = emulator_get_dr, | 4430 | .get_dr = emulator_get_dr, |
4264 | .set_dr = emulator_set_dr, | 4431 | .set_dr = emulator_set_dr, |
4265 | .set_msr = kvm_set_msr, | 4432 | .set_msr = emulator_set_msr, |
4266 | .get_msr = kvm_get_msr, | 4433 | .get_msr = emulator_get_msr, |
4434 | .halt = emulator_halt, | ||
4435 | .wbinvd = emulator_wbinvd, | ||
4436 | .fix_hypercall = emulator_fix_hypercall, | ||
4437 | .get_fpu = emulator_get_fpu, | ||
4438 | .put_fpu = emulator_put_fpu, | ||
4439 | .intercept = emulator_intercept, | ||
4267 | }; | 4440 | }; |
4268 | 4441 | ||
4269 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 4442 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -4305,12 +4478,17 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | |||
4305 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4478 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
4306 | int cs_db, cs_l; | 4479 | int cs_db, cs_l; |
4307 | 4480 | ||
4481 | /* | ||
4482 | * TODO: fix emulate.c to use guest_read/write_register | ||
4483 | * instead of direct ->regs accesses, can save hundred cycles | ||
4484 | * on Intel for instructions that don't read/change RSP, for | ||
4485 | * for example. | ||
4486 | */ | ||
4308 | cache_all_regs(vcpu); | 4487 | cache_all_regs(vcpu); |
4309 | 4488 | ||
4310 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 4489 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
4311 | 4490 | ||
4312 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 4491 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
4313 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); | ||
4314 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | 4492 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); |
4315 | vcpu->arch.emulate_ctxt.mode = | 4493 | vcpu->arch.emulate_ctxt.mode = |
4316 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 4494 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
@@ -4318,11 +4496,13 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | |||
4318 | ? X86EMUL_MODE_VM86 : cs_l | 4496 | ? X86EMUL_MODE_VM86 : cs_l |
4319 | ? X86EMUL_MODE_PROT64 : cs_db | 4497 | ? X86EMUL_MODE_PROT64 : cs_db |
4320 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 4498 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
4499 | vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); | ||
4321 | memset(c, 0, sizeof(struct decode_cache)); | 4500 | memset(c, 0, sizeof(struct decode_cache)); |
4322 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | 4501 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); |
4502 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | ||
4323 | } | 4503 | } |
4324 | 4504 | ||
4325 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) | 4505 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) |
4326 | { | 4506 | { |
4327 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4507 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
4328 | int ret; | 4508 | int ret; |
@@ -4331,7 +4511,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) | |||
4331 | 4511 | ||
4332 | vcpu->arch.emulate_ctxt.decode.op_bytes = 2; | 4512 | vcpu->arch.emulate_ctxt.decode.op_bytes = 2; |
4333 | vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; | 4513 | vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; |
4334 | vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; | 4514 | vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + |
4515 | inc_eip; | ||
4335 | ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); | 4516 | ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); |
4336 | 4517 | ||
4337 | if (ret != X86EMUL_CONTINUE) | 4518 | if (ret != X86EMUL_CONTINUE) |
@@ -4340,7 +4521,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) | |||
4340 | vcpu->arch.emulate_ctxt.eip = c->eip; | 4521 | vcpu->arch.emulate_ctxt.eip = c->eip; |
4341 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 4522 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); |
4342 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 4523 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); |
4343 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 4524 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
4344 | 4525 | ||
4345 | if (irq == NMI_VECTOR) | 4526 | if (irq == NMI_VECTOR) |
4346 | vcpu->arch.nmi_pending = false; | 4527 | vcpu->arch.nmi_pending = false; |
@@ -4402,16 +4583,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4402 | { | 4583 | { |
4403 | int r; | 4584 | int r; |
4404 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4585 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
4586 | bool writeback = true; | ||
4405 | 4587 | ||
4406 | kvm_clear_exception_queue(vcpu); | 4588 | kvm_clear_exception_queue(vcpu); |
4407 | vcpu->arch.mmio_fault_cr2 = cr2; | ||
4408 | /* | ||
4409 | * TODO: fix emulate.c to use guest_read/write_register | ||
4410 | * instead of direct ->regs accesses, can save hundred cycles | ||
4411 | * on Intel for instructions that don't read/change RSP, for | ||
4412 | * for example. | ||
4413 | */ | ||
4414 | cache_all_regs(vcpu); | ||
4415 | 4589 | ||
4416 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4590 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
4417 | init_emulate_ctxt(vcpu); | 4591 | init_emulate_ctxt(vcpu); |
@@ -4442,13 +4616,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4442 | return EMULATE_DONE; | 4616 | return EMULATE_DONE; |
4443 | } | 4617 | } |
4444 | 4618 | ||
4445 | /* this is needed for vmware backdor interface to work since it | 4619 | /* this is needed for vmware backdoor interface to work since it |
4446 | changes registers values during IO operation */ | 4620 | changes registers values during IO operation */ |
4447 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | 4621 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |
4622 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | ||
4623 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4624 | } | ||
4448 | 4625 | ||
4449 | restart: | 4626 | restart: |
4450 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); | 4627 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); |
4451 | 4628 | ||
4629 | if (r == EMULATION_INTERCEPTED) | ||
4630 | return EMULATE_DONE; | ||
4631 | |||
4452 | if (r == EMULATION_FAILED) { | 4632 | if (r == EMULATION_FAILED) { |
4453 | if (reexecute_instruction(vcpu, cr2)) | 4633 | if (reexecute_instruction(vcpu, cr2)) |
4454 | return EMULATE_DONE; | 4634 | return EMULATE_DONE; |
@@ -4462,21 +4642,28 @@ restart: | |||
4462 | } else if (vcpu->arch.pio.count) { | 4642 | } else if (vcpu->arch.pio.count) { |
4463 | if (!vcpu->arch.pio.in) | 4643 | if (!vcpu->arch.pio.in) |
4464 | vcpu->arch.pio.count = 0; | 4644 | vcpu->arch.pio.count = 0; |
4645 | else | ||
4646 | writeback = false; | ||
4465 | r = EMULATE_DO_MMIO; | 4647 | r = EMULATE_DO_MMIO; |
4466 | } else if (vcpu->mmio_needed) { | 4648 | } else if (vcpu->mmio_needed) { |
4467 | if (vcpu->mmio_is_write) | 4649 | if (!vcpu->mmio_is_write) |
4468 | vcpu->mmio_needed = 0; | 4650 | writeback = false; |
4469 | r = EMULATE_DO_MMIO; | 4651 | r = EMULATE_DO_MMIO; |
4470 | } else if (r == EMULATION_RESTART) | 4652 | } else if (r == EMULATION_RESTART) |
4471 | goto restart; | 4653 | goto restart; |
4472 | else | 4654 | else |
4473 | r = EMULATE_DONE; | 4655 | r = EMULATE_DONE; |
4474 | 4656 | ||
4475 | toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); | 4657 | if (writeback) { |
4476 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 4658 | toggle_interruptibility(vcpu, |
4477 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 4659 | vcpu->arch.emulate_ctxt.interruptibility); |
4478 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 4660 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
4479 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 4661 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
4662 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
4663 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
4664 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | ||
4665 | } else | ||
4666 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | ||
4480 | 4667 | ||
4481 | return r; | 4668 | return r; |
4482 | } | 4669 | } |
@@ -4485,7 +4672,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction); | |||
4485 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) | 4672 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
4486 | { | 4673 | { |
4487 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4674 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4488 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); | 4675 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, |
4676 | size, port, &val, 1); | ||
4489 | /* do not return to emulator after return from userspace */ | 4677 | /* do not return to emulator after return from userspace */ |
4490 | vcpu->arch.pio.count = 0; | 4678 | vcpu->arch.pio.count = 0; |
4491 | return ret; | 4679 | return ret; |
@@ -4879,8 +5067,9 @@ out: | |||
4879 | } | 5067 | } |
4880 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | 5068 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); |
4881 | 5069 | ||
4882 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | 5070 | int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) |
4883 | { | 5071 | { |
5072 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4884 | char instruction[3]; | 5073 | char instruction[3]; |
4885 | unsigned long rip = kvm_rip_read(vcpu); | 5074 | unsigned long rip = kvm_rip_read(vcpu); |
4886 | 5075 | ||
@@ -4893,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
4893 | 5082 | ||
4894 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 5083 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
4895 | 5084 | ||
4896 | return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); | 5085 | return emulator_write_emulated(&vcpu->arch.emulate_ctxt, |
4897 | } | 5086 | rip, instruction, 3, NULL); |
4898 | |||
4899 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | ||
4900 | { | ||
4901 | struct desc_ptr dt = { limit, base }; | ||
4902 | |||
4903 | kvm_x86_ops->set_gdt(vcpu, &dt); | ||
4904 | } | ||
4905 | |||
4906 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | ||
4907 | { | ||
4908 | struct desc_ptr dt = { limit, base }; | ||
4909 | |||
4910 | kvm_x86_ops->set_idt(vcpu, &dt); | ||
4911 | } | 5087 | } |
4912 | 5088 | ||
4913 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 5089 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
@@ -5170,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | |||
5170 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5346 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5171 | { | 5347 | { |
5172 | int r; | 5348 | int r; |
5349 | bool nmi_pending; | ||
5173 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5350 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
5174 | vcpu->run->request_interrupt_window; | 5351 | vcpu->run->request_interrupt_window; |
5175 | 5352 | ||
@@ -5207,19 +5384,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5207 | r = 1; | 5384 | r = 1; |
5208 | goto out; | 5385 | goto out; |
5209 | } | 5386 | } |
5210 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | ||
5211 | vcpu->arch.nmi_pending = true; | ||
5212 | } | 5387 | } |
5213 | 5388 | ||
5214 | r = kvm_mmu_reload(vcpu); | 5389 | r = kvm_mmu_reload(vcpu); |
5215 | if (unlikely(r)) | 5390 | if (unlikely(r)) |
5216 | goto out; | 5391 | goto out; |
5217 | 5392 | ||
5393 | /* | ||
5394 | * An NMI can be injected between local nmi_pending read and | ||
5395 | * vcpu->arch.nmi_pending read inside inject_pending_event(). | ||
5396 | * But in that case, KVM_REQ_EVENT will be set, which makes | ||
5397 | * the race described above benign. | ||
5398 | */ | ||
5399 | nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); | ||
5400 | |||
5218 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5401 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
5219 | inject_pending_event(vcpu); | 5402 | inject_pending_event(vcpu); |
5220 | 5403 | ||
5221 | /* enable NMI/IRQ window open exits if needed */ | 5404 | /* enable NMI/IRQ window open exits if needed */ |
5222 | if (vcpu->arch.nmi_pending) | 5405 | if (nmi_pending) |
5223 | kvm_x86_ops->enable_nmi_window(vcpu); | 5406 | kvm_x86_ops->enable_nmi_window(vcpu); |
5224 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | 5407 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) |
5225 | kvm_x86_ops->enable_irq_window(vcpu); | 5408 | kvm_x86_ops->enable_irq_window(vcpu); |
@@ -5399,6 +5582,41 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5399 | return r; | 5582 | return r; |
5400 | } | 5583 | } |
5401 | 5584 | ||
5585 | static int complete_mmio(struct kvm_vcpu *vcpu) | ||
5586 | { | ||
5587 | struct kvm_run *run = vcpu->run; | ||
5588 | int r; | ||
5589 | |||
5590 | if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) | ||
5591 | return 1; | ||
5592 | |||
5593 | if (vcpu->mmio_needed) { | ||
5594 | vcpu->mmio_needed = 0; | ||
5595 | if (!vcpu->mmio_is_write) | ||
5596 | memcpy(vcpu->mmio_data + vcpu->mmio_index, | ||
5597 | run->mmio.data, 8); | ||
5598 | vcpu->mmio_index += 8; | ||
5599 | if (vcpu->mmio_index < vcpu->mmio_size) { | ||
5600 | run->exit_reason = KVM_EXIT_MMIO; | ||
5601 | run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; | ||
5602 | memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); | ||
5603 | run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); | ||
5604 | run->mmio.is_write = vcpu->mmio_is_write; | ||
5605 | vcpu->mmio_needed = 1; | ||
5606 | return 0; | ||
5607 | } | ||
5608 | if (vcpu->mmio_is_write) | ||
5609 | return 1; | ||
5610 | vcpu->mmio_read_completed = 1; | ||
5611 | } | ||
5612 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5613 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); | ||
5614 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
5615 | if (r != EMULATE_DONE) | ||
5616 | return 0; | ||
5617 | return 1; | ||
5618 | } | ||
5619 | |||
5402 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 5620 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
5403 | { | 5621 | { |
5404 | int r; | 5622 | int r; |
@@ -5425,20 +5643,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5425 | } | 5643 | } |
5426 | } | 5644 | } |
5427 | 5645 | ||
5428 | if (vcpu->arch.pio.count || vcpu->mmio_needed) { | 5646 | r = complete_mmio(vcpu); |
5429 | if (vcpu->mmio_needed) { | 5647 | if (r <= 0) |
5430 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 5648 | goto out; |
5431 | vcpu->mmio_read_completed = 1; | 5649 | |
5432 | vcpu->mmio_needed = 0; | ||
5433 | } | ||
5434 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5435 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); | ||
5436 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
5437 | if (r != EMULATE_DONE) { | ||
5438 | r = 0; | ||
5439 | goto out; | ||
5440 | } | ||
5441 | } | ||
5442 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | 5650 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
5443 | kvm_register_write(vcpu, VCPU_REGS_RAX, | 5651 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
5444 | kvm_run->hypercall.ret); | 5652 | kvm_run->hypercall.ret); |
@@ -5455,6 +5663,18 @@ out: | |||
5455 | 5663 | ||
5456 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 5664 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
5457 | { | 5665 | { |
5666 | if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { | ||
5667 | /* | ||
5668 | * We are here if userspace calls get_regs() in the middle of | ||
5669 | * instruction emulation. Registers state needs to be copied | ||
5670 | * back from emulation context to vcpu. Usrapace shouldn't do | ||
5671 | * that usually, but some bad designed PV devices (vmware | ||
5672 | * backdoor interface) need this to work | ||
5673 | */ | ||
5674 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | ||
5675 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
5676 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
5677 | } | ||
5458 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 5678 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
5459 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | 5679 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
5460 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 5680 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -5482,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
5482 | 5702 | ||
5483 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 5703 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
5484 | { | 5704 | { |
5705 | vcpu->arch.emulate_regs_need_sync_from_vcpu = true; | ||
5706 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
5707 | |||
5485 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); | 5708 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); |
5486 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); | 5709 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); |
5487 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); | 5710 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); |
@@ -5592,7 +5815,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
5592 | 5815 | ||
5593 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 5816 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); |
5594 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 5817 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); |
5595 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 5818 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
5596 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5819 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5597 | return EMULATE_DONE; | 5820 | return EMULATE_DONE; |
5598 | } | 5821 | } |
@@ -5974,8 +6197,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5974 | } | 6197 | } |
5975 | vcpu->arch.pio_data = page_address(page); | 6198 | vcpu->arch.pio_data = page_address(page); |
5976 | 6199 | ||
5977 | if (!kvm->arch.virtual_tsc_khz) | 6200 | kvm_init_tsc_catchup(vcpu, max_tsc_khz); |
5978 | kvm_arch_set_tsc_khz(kvm, max_tsc_khz); | ||
5979 | 6201 | ||
5980 | r = kvm_mmu_create(vcpu); | 6202 | r = kvm_mmu_create(vcpu); |
5981 | if (r < 0) | 6203 | if (r < 0) |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c600da830ce0..e407ed3df817 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -77,7 +77,7 @@ static inline u32 bit(int bitno) | |||
77 | 77 | ||
78 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 78 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
79 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 79 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
80 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); | 80 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); |
81 | 81 | ||
82 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); | 82 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); |
83 | 83 | ||
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ea2dc1a2e13d..55ef181521ff 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo { | |||
541 | #define KVM_CAP_PPC_GET_PVINFO 57 | 541 | #define KVM_CAP_PPC_GET_PVINFO 57 |
542 | #define KVM_CAP_PPC_IRQ_LEVEL 58 | 542 | #define KVM_CAP_PPC_IRQ_LEVEL 58 |
543 | #define KVM_CAP_ASYNC_PF 59 | 543 | #define KVM_CAP_ASYNC_PF 59 |
544 | #define KVM_CAP_TSC_CONTROL 60 | ||
545 | #define KVM_CAP_GET_TSC_KHZ 61 | ||
546 | #define KVM_CAP_PPC_BOOKE_SREGS 62 | ||
544 | 547 | ||
545 | #ifdef KVM_CAP_IRQ_ROUTING | 548 | #ifdef KVM_CAP_IRQ_ROUTING |
546 | 549 | ||
@@ -677,6 +680,9 @@ struct kvm_clock_data { | |||
677 | #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) | 680 | #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) |
678 | /* Available with KVM_CAP_PPC_GET_PVINFO */ | 681 | /* Available with KVM_CAP_PPC_GET_PVINFO */ |
679 | #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) | 682 | #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) |
683 | /* Available with KVM_CAP_TSC_CONTROL */ | ||
684 | #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) | ||
685 | #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) | ||
680 | 686 | ||
681 | /* | 687 | /* |
682 | * ioctls for vcpu fds | 688 | * ioctls for vcpu fds |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ab428552af8e..b9c3299c6a55 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -27,6 +27,10 @@ | |||
27 | 27 | ||
28 | #include <asm/kvm_host.h> | 28 | #include <asm/kvm_host.h> |
29 | 29 | ||
30 | #ifndef KVM_MMIO_SIZE | ||
31 | #define KVM_MMIO_SIZE 8 | ||
32 | #endif | ||
33 | |||
30 | /* | 34 | /* |
31 | * vcpu->requests bit members | 35 | * vcpu->requests bit members |
32 | */ | 36 | */ |
@@ -43,7 +47,6 @@ | |||
43 | #define KVM_REQ_DEACTIVATE_FPU 10 | 47 | #define KVM_REQ_DEACTIVATE_FPU 10 |
44 | #define KVM_REQ_EVENT 11 | 48 | #define KVM_REQ_EVENT 11 |
45 | #define KVM_REQ_APF_HALT 12 | 49 | #define KVM_REQ_APF_HALT 12 |
46 | #define KVM_REQ_NMI 13 | ||
47 | 50 | ||
48 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 51 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
49 | 52 | ||
@@ -133,7 +136,8 @@ struct kvm_vcpu { | |||
133 | int mmio_read_completed; | 136 | int mmio_read_completed; |
134 | int mmio_is_write; | 137 | int mmio_is_write; |
135 | int mmio_size; | 138 | int mmio_size; |
136 | unsigned char mmio_data[8]; | 139 | int mmio_index; |
140 | unsigned char mmio_data[KVM_MMIO_SIZE]; | ||
137 | gpa_t mmio_phys_addr; | 141 | gpa_t mmio_phys_addr; |
138 | #endif | 142 | #endif |
139 | 143 | ||
@@ -292,9 +296,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) | |||
292 | } | 296 | } |
293 | 297 | ||
294 | #define kvm_for_each_vcpu(idx, vcpup, kvm) \ | 298 | #define kvm_for_each_vcpu(idx, vcpup, kvm) \ |
295 | for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \ | 299 | for (idx = 0; \ |
296 | idx < atomic_read(&kvm->online_vcpus) && vcpup; \ | 300 | idx < atomic_read(&kvm->online_vcpus) && \ |
297 | vcpup = kvm_get_vcpu(kvm, ++idx)) | 301 | (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ |
302 | idx++) | ||
298 | 303 | ||
299 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); | 304 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); |
300 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); | 305 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); |
@@ -365,7 +370,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | |||
365 | bool *writable); | 370 | bool *writable); |
366 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 371 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
367 | struct kvm_memory_slot *slot, gfn_t gfn); | 372 | struct kvm_memory_slot *slot, gfn_t gfn); |
368 | int memslot_id(struct kvm *kvm, gfn_t gfn); | ||
369 | void kvm_release_pfn_dirty(pfn_t); | 373 | void kvm_release_pfn_dirty(pfn_t); |
370 | void kvm_release_pfn_clean(pfn_t pfn); | 374 | void kvm_release_pfn_clean(pfn_t pfn); |
371 | void kvm_set_pfn_dirty(pfn_t pfn); | 375 | void kvm_set_pfn_dirty(pfn_t pfn); |
@@ -587,8 +591,17 @@ static inline int kvm_deassign_device(struct kvm *kvm, | |||
587 | 591 | ||
588 | static inline void kvm_guest_enter(void) | 592 | static inline void kvm_guest_enter(void) |
589 | { | 593 | { |
594 | BUG_ON(preemptible()); | ||
590 | account_system_vtime(current); | 595 | account_system_vtime(current); |
591 | current->flags |= PF_VCPU; | 596 | current->flags |= PF_VCPU; |
597 | /* KVM does not hold any references to rcu protected data when it | ||
598 | * switches CPU into a guest mode. In fact switching to a guest mode | ||
599 | * is very similar to exiting to userspase from rcu point of view. In | ||
600 | * addition CPU may stay in a guest mode for quite a long time (up to | ||
601 | * one time slice). Lets treat guest mode as quiescent state, just like | ||
602 | * we do with user-mode execution. | ||
603 | */ | ||
604 | rcu_virt_note_context_switch(smp_processor_id()); | ||
592 | } | 605 | } |
593 | 606 | ||
594 | static inline void kvm_guest_exit(void) | 607 | static inline void kvm_guest_exit(void) |
@@ -597,6 +610,11 @@ static inline void kvm_guest_exit(void) | |||
597 | current->flags &= ~PF_VCPU; | 610 | current->flags &= ~PF_VCPU; |
598 | } | 611 | } |
599 | 612 | ||
613 | static inline int memslot_id(struct kvm *kvm, gfn_t gfn) | ||
614 | { | ||
615 | return gfn_to_memslot(kvm, gfn)->id; | ||
616 | } | ||
617 | |||
600 | static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, | 618 | static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, |
601 | gfn_t gfn) | 619 | gfn_t gfn) |
602 | { | 620 | { |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 0b9df8303dcf..8df1ca104a7f 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -167,7 +167,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | |||
167 | 167 | ||
168 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " | 168 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " |
169 | "vector=%x trig_mode=%x\n", | 169 | "vector=%x trig_mode=%x\n", |
170 | entry->fields.dest, entry->fields.dest_mode, | 170 | entry->fields.dest_id, entry->fields.dest_mode, |
171 | entry->fields.delivery_mode, entry->fields.vector, | 171 | entry->fields.delivery_mode, entry->fields.vector, |
172 | entry->fields.trig_mode); | 172 | entry->fields.trig_mode); |
173 | 173 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6330653480e4..22cdb960660a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -467,6 +467,7 @@ static struct kvm *kvm_create_vm(void) | |||
467 | if (!kvm->buses[i]) | 467 | if (!kvm->buses[i]) |
468 | goto out_err; | 468 | goto out_err; |
469 | } | 469 | } |
470 | spin_lock_init(&kvm->mmu_lock); | ||
470 | 471 | ||
471 | r = kvm_init_mmu_notifier(kvm); | 472 | r = kvm_init_mmu_notifier(kvm); |
472 | if (r) | 473 | if (r) |
@@ -474,7 +475,6 @@ static struct kvm *kvm_create_vm(void) | |||
474 | 475 | ||
475 | kvm->mm = current->mm; | 476 | kvm->mm = current->mm; |
476 | atomic_inc(&kvm->mm->mm_count); | 477 | atomic_inc(&kvm->mm->mm_count); |
477 | spin_lock_init(&kvm->mmu_lock); | ||
478 | kvm_eventfd_init(kvm); | 478 | kvm_eventfd_init(kvm); |
479 | mutex_init(&kvm->lock); | 479 | mutex_init(&kvm->lock); |
480 | mutex_init(&kvm->irq_lock); | 480 | mutex_init(&kvm->irq_lock); |
@@ -648,7 +648,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
648 | goto out; | 648 | goto out; |
649 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) | 649 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) |
650 | goto out; | 650 | goto out; |
651 | if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) | 651 | /* We can read the guest memory with __xxx_user() later on. */ |
652 | if (user_alloc && | ||
653 | ((mem->userspace_addr & (PAGE_SIZE - 1)) || | ||
654 | !access_ok(VERIFY_WRITE, mem->userspace_addr, mem->memory_size))) | ||
652 | goto out; | 655 | goto out; |
653 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 656 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
654 | goto out; | 657 | goto out; |
@@ -996,23 +999,6 @@ out: | |||
996 | return size; | 999 | return size; |
997 | } | 1000 | } |
998 | 1001 | ||
999 | int memslot_id(struct kvm *kvm, gfn_t gfn) | ||
1000 | { | ||
1001 | int i; | ||
1002 | struct kvm_memslots *slots = kvm_memslots(kvm); | ||
1003 | struct kvm_memory_slot *memslot = NULL; | ||
1004 | |||
1005 | for (i = 0; i < slots->nmemslots; ++i) { | ||
1006 | memslot = &slots->memslots[i]; | ||
1007 | |||
1008 | if (gfn >= memslot->base_gfn | ||
1009 | && gfn < memslot->base_gfn + memslot->npages) | ||
1010 | break; | ||
1011 | } | ||
1012 | |||
1013 | return memslot - slots->memslots; | ||
1014 | } | ||
1015 | |||
1016 | static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, | 1002 | static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, |
1017 | gfn_t *nr_pages) | 1003 | gfn_t *nr_pages) |
1018 | { | 1004 | { |
@@ -1300,7 +1286,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | |||
1300 | addr = gfn_to_hva(kvm, gfn); | 1286 | addr = gfn_to_hva(kvm, gfn); |
1301 | if (kvm_is_error_hva(addr)) | 1287 | if (kvm_is_error_hva(addr)) |
1302 | return -EFAULT; | 1288 | return -EFAULT; |
1303 | r = copy_from_user(data, (void __user *)addr + offset, len); | 1289 | r = __copy_from_user(data, (void __user *)addr + offset, len); |
1304 | if (r) | 1290 | if (r) |
1305 | return -EFAULT; | 1291 | return -EFAULT; |
1306 | return 0; | 1292 | return 0; |