aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/cmpxchg.h6
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/i387.h284
-rw-r--r--arch/x86/include/asm/kvm_emulate.h16
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h4
7 files changed, 252 insertions, 62 deletions
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 0c9fa2745f13..b3b733262909 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -145,13 +145,13 @@ extern void __add_wrong_size(void)
145 145
146#ifdef __HAVE_ARCH_CMPXCHG 146#ifdef __HAVE_ARCH_CMPXCHG
147#define cmpxchg(ptr, old, new) \ 147#define cmpxchg(ptr, old, new) \
148 __cmpxchg((ptr), (old), (new), sizeof(*ptr)) 148 __cmpxchg(ptr, old, new, sizeof(*(ptr)))
149 149
150#define sync_cmpxchg(ptr, old, new) \ 150#define sync_cmpxchg(ptr, old, new) \
151 __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) 151 __sync_cmpxchg(ptr, old, new, sizeof(*(ptr)))
152 152
153#define cmpxchg_local(ptr, old, new) \ 153#define cmpxchg_local(ptr, old, new) \
154 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) 154 __cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
155#endif 155#endif
156 156
157/* 157/*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 17c5d4bdee5e..8d67d428b0f9 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -159,6 +159,7 @@
159#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ 159#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
160#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */ 160#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
161#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */ 161#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
162#define X86_FEATURE_TCE (6*32+17) /* translation cache extension */
162#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ 163#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
163#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ 164#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
164#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ 165#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 6919e936345b..a850b4d8d14d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -29,8 +29,8 @@ extern unsigned int sig_xstate_size;
29extern void fpu_init(void); 29extern void fpu_init(void);
30extern void mxcsr_feature_mask_init(void); 30extern void mxcsr_feature_mask_init(void);
31extern int init_fpu(struct task_struct *child); 31extern int init_fpu(struct task_struct *child);
32extern asmlinkage void math_state_restore(void); 32extern void __math_state_restore(struct task_struct *);
33extern void __math_state_restore(void); 33extern void math_state_restore(void);
34extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 34extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
35 35
36extern user_regset_active_fn fpregs_active, xfpregs_active; 36extern user_regset_active_fn fpregs_active, xfpregs_active;
@@ -212,19 +212,11 @@ static inline void fpu_fxsave(struct fpu *fpu)
212 212
213#endif /* CONFIG_X86_64 */ 213#endif /* CONFIG_X86_64 */
214 214
215/* We need a safe address that is cheap to find and that is already
216 in L1 during context switch. The best choices are unfortunately
217 different for UP and SMP */
218#ifdef CONFIG_SMP
219#define safe_address (__per_cpu_offset[0])
220#else
221#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
222#endif
223
224/* 215/*
225 * These must be called with preempt disabled 216 * These must be called with preempt disabled. Returns
217 * 'true' if the FPU state is still intact.
226 */ 218 */
227static inline void fpu_save_init(struct fpu *fpu) 219static inline int fpu_save_init(struct fpu *fpu)
228{ 220{
229 if (use_xsave()) { 221 if (use_xsave()) {
230 fpu_xsave(fpu); 222 fpu_xsave(fpu);
@@ -233,33 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
233 * xsave header may indicate the init state of the FP. 225 * xsave header may indicate the init state of the FP.
234 */ 226 */
235 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) 227 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
236 return; 228 return 1;
237 } else if (use_fxsr()) { 229 } else if (use_fxsr()) {
238 fpu_fxsave(fpu); 230 fpu_fxsave(fpu);
239 } else { 231 } else {
240 asm volatile("fnsave %[fx]; fwait" 232 asm volatile("fnsave %[fx]; fwait"
241 : [fx] "=m" (fpu->state->fsave)); 233 : [fx] "=m" (fpu->state->fsave));
242 return; 234 return 0;
243 } 235 }
244 236
245 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) 237 /*
238 * If exceptions are pending, we need to clear them so
239 * that we don't randomly get exceptions later.
240 *
241 * FIXME! Is this perhaps only true for the old-style
242 * irq13 case? Maybe we could leave the x87 state
243 * intact otherwise?
244 */
245 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
246 asm volatile("fnclex"); 246 asm volatile("fnclex");
247 247 return 0;
248 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 248 }
249 is pending. Clear the x87 state here by setting it to fixed 249 return 1;
250 values. safe_address is a random variable that should be in L1 */
251 alternative_input(
252 ASM_NOP8 ASM_NOP2,
253 "emms\n\t" /* clear stack tags */
254 "fildl %P[addr]", /* set F?P to defined value */
255 X86_FEATURE_FXSAVE_LEAK,
256 [addr] "m" (safe_address));
257} 250}
258 251
259static inline void __save_init_fpu(struct task_struct *tsk) 252static inline int __save_init_fpu(struct task_struct *tsk)
260{ 253{
261 fpu_save_init(&tsk->thread.fpu); 254 return fpu_save_init(&tsk->thread.fpu);
262 task_thread_info(tsk)->status &= ~TS_USEDFPU;
263} 255}
264 256
265static inline int fpu_fxrstor_checking(struct fpu *fpu) 257static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -281,39 +273,185 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
281} 273}
282 274
283/* 275/*
284 * Signal frame handlers... 276 * Software FPU state helpers. Careful: these need to
277 * be preemption protection *and* they need to be
278 * properly paired with the CR0.TS changes!
285 */ 279 */
286extern int save_i387_xstate(void __user *buf); 280static inline int __thread_has_fpu(struct task_struct *tsk)
287extern int restore_i387_xstate(void __user *buf); 281{
282 return tsk->thread.has_fpu;
283}
288 284
289static inline void __unlazy_fpu(struct task_struct *tsk) 285/* Must be paired with an 'stts' after! */
286static inline void __thread_clear_has_fpu(struct task_struct *tsk)
290{ 287{
291 if (task_thread_info(tsk)->status & TS_USEDFPU) { 288 tsk->thread.has_fpu = 0;
292 __save_init_fpu(tsk); 289}
293 stts(); 290
294 } else 291/* Must be paired with a 'clts' before! */
295 tsk->fpu_counter = 0; 292static inline void __thread_set_has_fpu(struct task_struct *tsk)
293{
294 tsk->thread.has_fpu = 1;
296} 295}
297 296
297/*
298 * Encapsulate the CR0.TS handling together with the
299 * software flag.
300 *
301 * These generally need preemption protection to work,
302 * do try to avoid using these on their own.
303 */
304static inline void __thread_fpu_end(struct task_struct *tsk)
305{
306 __thread_clear_has_fpu(tsk);
307 stts();
308}
309
310static inline void __thread_fpu_begin(struct task_struct *tsk)
311{
312 clts();
313 __thread_set_has_fpu(tsk);
314}
315
316/*
317 * FPU state switching for scheduling.
318 *
319 * This is a two-stage process:
320 *
321 * - switch_fpu_prepare() saves the old state and
322 * sets the new state of the CR0.TS bit. This is
323 * done within the context of the old process.
324 *
325 * - switch_fpu_finish() restores the new state as
326 * necessary.
327 */
328typedef struct { int preload; } fpu_switch_t;
329
330/*
331 * FIXME! We could do a totally lazy restore, but we need to
332 * add a per-cpu "this was the task that last touched the FPU
333 * on this CPU" variable, and the task needs to have a "I last
334 * touched the FPU on this CPU" and check them.
335 *
336 * We don't do that yet, so "fpu_lazy_restore()" always returns
337 * false, but some day..
338 */
339#define fpu_lazy_restore(tsk) (0)
340#define fpu_lazy_state_intact(tsk) do { } while (0)
341
342static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
343{
344 fpu_switch_t fpu;
345
346 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
347 if (__thread_has_fpu(old)) {
348 if (__save_init_fpu(old))
349 fpu_lazy_state_intact(old);
350 __thread_clear_has_fpu(old);
351 old->fpu_counter++;
352
353 /* Don't change CR0.TS if we just switch! */
354 if (fpu.preload) {
355 __thread_set_has_fpu(new);
356 prefetch(new->thread.fpu.state);
357 } else
358 stts();
359 } else {
360 old->fpu_counter = 0;
361 if (fpu.preload) {
362 if (fpu_lazy_restore(new))
363 fpu.preload = 0;
364 else
365 prefetch(new->thread.fpu.state);
366 __thread_fpu_begin(new);
367 }
368 }
369 return fpu;
370}
371
372/*
373 * By the time this gets called, we've already cleared CR0.TS and
374 * given the process the FPU if we are going to preload the FPU
375 * state - all we need to do is to conditionally restore the register
376 * state itself.
377 */
378static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
379{
380 if (fpu.preload)
381 __math_state_restore(new);
382}
383
384/*
385 * Signal frame handlers...
386 */
387extern int save_i387_xstate(void __user *buf);
388extern int restore_i387_xstate(void __user *buf);
389
298static inline void __clear_fpu(struct task_struct *tsk) 390static inline void __clear_fpu(struct task_struct *tsk)
299{ 391{
300 if (task_thread_info(tsk)->status & TS_USEDFPU) { 392 if (__thread_has_fpu(tsk)) {
301 /* Ignore delayed exceptions from user space */ 393 /* Ignore delayed exceptions from user space */
302 asm volatile("1: fwait\n" 394 asm volatile("1: fwait\n"
303 "2:\n" 395 "2:\n"
304 _ASM_EXTABLE(1b, 2b)); 396 _ASM_EXTABLE(1b, 2b));
305 task_thread_info(tsk)->status &= ~TS_USEDFPU; 397 __thread_fpu_end(tsk);
306 stts();
307 } 398 }
308} 399}
309 400
401/*
402 * Were we in an interrupt that interrupted kernel mode?
403 *
404 * We can do a kernel_fpu_begin/end() pair *ONLY* if that
405 * pair does nothing at all: the thread must not have fpu (so
406 * that we don't try to save the FPU state), and TS must
407 * be set (so that the clts/stts pair does nothing that is
408 * visible in the interrupted kernel thread).
409 */
410static inline bool interrupted_kernel_fpu_idle(void)
411{
412 return !__thread_has_fpu(current) &&
413 (read_cr0() & X86_CR0_TS);
414}
415
416/*
417 * Were we in user mode (or vm86 mode) when we were
418 * interrupted?
419 *
420 * Doing kernel_fpu_begin/end() is ok if we are running
421 * in an interrupt context from user mode - we'll just
422 * save the FPU state as required.
423 */
424static inline bool interrupted_user_mode(void)
425{
426 struct pt_regs *regs = get_irq_regs();
427 return regs && user_mode_vm(regs);
428}
429
430/*
431 * Can we use the FPU in kernel mode with the
432 * whole "kernel_fpu_begin/end()" sequence?
433 *
434 * It's always ok in process context (ie "not interrupt")
435 * but it is sometimes ok even from an irq.
436 */
437static inline bool irq_fpu_usable(void)
438{
439 return !in_interrupt() ||
440 interrupted_user_mode() ||
441 interrupted_kernel_fpu_idle();
442}
443
310static inline void kernel_fpu_begin(void) 444static inline void kernel_fpu_begin(void)
311{ 445{
312 struct thread_info *me = current_thread_info(); 446 struct task_struct *me = current;
447
448 WARN_ON_ONCE(!irq_fpu_usable());
313 preempt_disable(); 449 preempt_disable();
314 if (me->status & TS_USEDFPU) 450 if (__thread_has_fpu(me)) {
315 __save_init_fpu(me->task); 451 __save_init_fpu(me);
316 else 452 __thread_clear_has_fpu(me);
453 /* We do 'stts()' in kernel_fpu_end() */
454 } else
317 clts(); 455 clts();
318} 456}
319 457
@@ -323,14 +461,6 @@ static inline void kernel_fpu_end(void)
323 preempt_enable(); 461 preempt_enable();
324} 462}
325 463
326static inline bool irq_fpu_usable(void)
327{
328 struct pt_regs *regs;
329
330 return !in_interrupt() || !(regs = get_irq_regs()) || \
331 user_mode(regs) || (read_cr0() & X86_CR0_TS);
332}
333
334/* 464/*
335 * Some instructions like VIA's padlock instructions generate a spurious 465 * Some instructions like VIA's padlock instructions generate a spurious
336 * DNA fault but don't modify SSE registers. And these instructions 466 * DNA fault but don't modify SSE registers. And these instructions
@@ -363,20 +493,64 @@ static inline void irq_ts_restore(int TS_state)
363} 493}
364 494
365/* 495/*
496 * The question "does this thread have fpu access?"
497 * is slightly racy, since preemption could come in
498 * and revoke it immediately after the test.
499 *
500 * However, even in that very unlikely scenario,
501 * we can just assume we have FPU access - typically
502 * to save the FP state - we'll just take a #NM
503 * fault and get the FPU access back.
504 *
505 * The actual user_fpu_begin/end() functions
506 * need to be preemption-safe, though.
507 *
508 * NOTE! user_fpu_end() must be used only after you
509 * have saved the FP state, and user_fpu_begin() must
510 * be used only immediately before restoring it.
511 * These functions do not do any save/restore on
512 * their own.
513 */
514static inline int user_has_fpu(void)
515{
516 return __thread_has_fpu(current);
517}
518
519static inline void user_fpu_end(void)
520{
521 preempt_disable();
522 __thread_fpu_end(current);
523 preempt_enable();
524}
525
526static inline void user_fpu_begin(void)
527{
528 preempt_disable();
529 if (!user_has_fpu())
530 __thread_fpu_begin(current);
531 preempt_enable();
532}
533
534/*
366 * These disable preemption on their own and are safe 535 * These disable preemption on their own and are safe
367 */ 536 */
368static inline void save_init_fpu(struct task_struct *tsk) 537static inline void save_init_fpu(struct task_struct *tsk)
369{ 538{
539 WARN_ON_ONCE(!__thread_has_fpu(tsk));
370 preempt_disable(); 540 preempt_disable();
371 __save_init_fpu(tsk); 541 __save_init_fpu(tsk);
372 stts(); 542 __thread_fpu_end(tsk);
373 preempt_enable(); 543 preempt_enable();
374} 544}
375 545
376static inline void unlazy_fpu(struct task_struct *tsk) 546static inline void unlazy_fpu(struct task_struct *tsk)
377{ 547{
378 preempt_disable(); 548 preempt_disable();
379 __unlazy_fpu(tsk); 549 if (__thread_has_fpu(tsk)) {
550 __save_init_fpu(tsk);
551 __thread_fpu_end(tsk);
552 } else
553 tsk->fpu_counter = 0;
380 preempt_enable(); 554 preempt_enable();
381} 555}
382 556
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index ab4092e3214e..7b9cfc4878af 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -190,6 +190,9 @@ struct x86_emulate_ops {
190 int (*intercept)(struct x86_emulate_ctxt *ctxt, 190 int (*intercept)(struct x86_emulate_ctxt *ctxt,
191 struct x86_instruction_info *info, 191 struct x86_instruction_info *info,
192 enum x86_intercept_stage stage); 192 enum x86_intercept_stage stage);
193
194 bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
195 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
193}; 196};
194 197
195typedef u32 __attribute__((vector_size(16))) sse128_t; 198typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -298,6 +301,19 @@ struct x86_emulate_ctxt {
298#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ 301#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
299 X86EMUL_MODE_PROT64) 302 X86EMUL_MODE_PROT64)
300 303
304/* CPUID vendors */
305#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
306#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
307#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
308
309#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41
310#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574
311#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273
312
313#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547
314#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
315#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
316
301enum x86_intercept_stage { 317enum x86_intercept_stage {
302 X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ 318 X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
303 X86_ICPT_PRE_EXCEPT, 319 X86_ICPT_PRE_EXCEPT,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index aa9088c26931..f7c89e231c6c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -454,6 +454,7 @@ struct thread_struct {
454 unsigned long trap_no; 454 unsigned long trap_no;
455 unsigned long error_code; 455 unsigned long error_code;
456 /* floating point and extended processor state */ 456 /* floating point and extended processor state */
457 unsigned long has_fpu;
457 struct fpu fpu; 458 struct fpu fpu;
458#ifdef CONFIG_X86_32 459#ifdef CONFIG_X86_32
459 /* Virtual 86 mode info */ 460 /* Virtual 86 mode info */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index bc817cd8b443..cfd8144d5527 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -247,8 +247,6 @@ static inline struct thread_info *current_thread_info(void)
247 * ever touches our thread-synchronous status, so we don't 247 * ever touches our thread-synchronous status, so we don't
248 * have to worry about atomic accesses. 248 * have to worry about atomic accesses.
249 */ 249 */
250#define TS_USEDFPU 0x0001 /* FPU was used by this task
251 this quantum (SMP) */
252#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ 250#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
253#define TS_POLLING 0x0004 /* idle task polling need_resched, 251#define TS_POLLING 0x0004 /* idle task polling need_resched,
254 skip sending interrupt */ 252 skip sending interrupt */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 54a13aaebc40..21f7385badb8 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -318,13 +318,13 @@ uv_gpa_in_mmr_space(unsigned long gpa)
318/* UV global physical address --> socket phys RAM */ 318/* UV global physical address --> socket phys RAM */
319static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa) 319static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa)
320{ 320{
321 unsigned long paddr = gpa & uv_hub_info->gpa_mask; 321 unsigned long paddr;
322 unsigned long remap_base = uv_hub_info->lowmem_remap_base; 322 unsigned long remap_base = uv_hub_info->lowmem_remap_base;
323 unsigned long remap_top = uv_hub_info->lowmem_remap_top; 323 unsigned long remap_top = uv_hub_info->lowmem_remap_top;
324 324
325 gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | 325 gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
326 ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); 326 ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
327 gpa = gpa & uv_hub_info->gpa_mask; 327 paddr = gpa & uv_hub_info->gpa_mask;
328 if (paddr >= remap_base && paddr < remap_base + remap_top) 328 if (paddr >= remap_base && paddr < remap_base + remap_top)
329 paddr -= remap_base; 329 paddr -= remap_base;
330 return paddr; 330 return paddr;