aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorArnd Bergmann <arnd@arndb.de>2012-10-04 16:57:00 -0400
committerArnd Bergmann <arnd@arndb.de>2012-10-04 16:57:51 -0400
commitc37d6154c0b9163c27e53cc1d0be3867b4abd760 (patch)
tree7a24522c56d1cb284dff1d3c225bbdaba0901bb5 /arch/x86/kernel
parente7a570ff7dff9af6e54ff5e580a61ec7652137a0 (diff)
parent8a1ab3155c2ac7fbe5f2038d6e26efeb607a1498 (diff)
Merge branch 'disintegrate-asm-generic' of git://git.infradead.org/users/dhowells/linux-headers into asm-generic
Patches from David Howells <dhowells@redhat.com>: This is to complete part of the UAPI disintegration for which the preparatory patches were pulled recently. Note that there are some fixup patches which are at the base of the branch aimed at you, plus all arches get the asm-generic branch merged in too. * 'disintegrate-asm-generic' of git://git.infradead.org/users/dhowells/linux-headers: UAPI: (Scripted) Disintegrate include/asm-generic UAPI: Fix conditional header installation handling (notably kvm_para.h on m68k) c6x: remove c6x signal.h UAPI: Split compound conditionals containing __KERNEL__ in Arm64 UAPI: Fix the guards on various asm/unistd.h files Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/acpi/sleep.c15
-rw-r--r--arch/x86/kernel/alternative.c111
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c67
-rw-r--r--arch/x86/kernel/cpu/bugs.c7
-rw-r--r--arch/x86/kernel/cpu/common.c63
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c168
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c30
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h6
-rw-r--r--arch/x86/kernel/cpu/proc.c5
-rw-r--r--arch/x86/kernel/cpuid.c5
-rw-r--r--arch/x86/kernel/devicetree.c51
-rw-r--r--arch/x86/kernel/entry_32.S100
-rw-r--r--arch/x86/kernel/entry_64.S172
-rw-r--r--arch/x86/kernel/ftrace.c73
-rw-r--r--arch/x86/kernel/head_32.S31
-rw-r--r--arch/x86/kernel/i387.c292
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/kprobes.c67
-rw-r--r--arch/x86/kernel/microcode_amd.c357
-rw-r--r--arch/x86/kernel/microcode_core.c67
-rw-r--r--arch/x86/kernel/microcode_intel.c3
-rw-r--r--arch/x86/kernel/msr.c5
-rw-r--r--arch/x86/kernel/perf_regs.c105
-rw-r--r--arch/x86/kernel/probe_roms.c2
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/signal.c231
-rw-r--r--arch/x86/kernel/smpboot.c20
-rw-r--r--arch/x86/kernel/step.c53
-rw-r--r--arch/x86/kernel/traps.c174
-rw-r--r--arch/x86/kernel/uprobes.c52
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c6
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/kernel/xsave.c517
46 files changed, 1908 insertions, 1128 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d97..8d7a619718b5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
100obj-$(CONFIG_OF) += devicetree.o 100obj-$(CONFIG_OF) += devicetree.o
101obj-$(CONFIG_UPROBES) += uprobes.o 101obj-$(CONFIG_UPROBES) += uprobes.o
102 102
103obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
104
103### 105###
104# 64 bit specific files 106# 64 bit specific files
105ifeq ($(CONFIG_X86_64),y) 107ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b2297e58c6ed..e651f7a589ac 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
656 acpi_register_lapic(physid, ACPI_MADT_ENABLED); 656 acpi_register_lapic(physid, ACPI_MADT_ENABLED);
657 657
658 /* 658 /*
659 * If mp_register_lapic successfully generates a new logical cpu 659 * If acpi_register_lapic successfully generates a new logical cpu
660 * number, then the following will get us exactly what was mapped 660 * number, then the following will get us exactly what was mapped
661 */ 661 */
662 cpumask_andnot(new_map, cpu_present_mask, tmp_map); 662 cpumask_andnot(new_map, cpu_present_mask, tmp_map);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 1b8e5a03d942..11676cf65aee 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void)
43 43
44 header->video_mode = saved_video_mode; 44 header->video_mode = saved_video_mode;
45 45
46 header->pmode_behavior = 0;
47
46#ifndef CONFIG_64BIT 48#ifndef CONFIG_64BIT
47 store_gdt((struct desc_ptr *)&header->pmode_gdt); 49 store_gdt((struct desc_ptr *)&header->pmode_gdt);
48 50
49 if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, 51 if (!rdmsr_safe(MSR_EFER,
50 &header->pmode_efer_high)) 52 &header->pmode_efer_low,
51 header->pmode_efer_low = header->pmode_efer_high = 0; 53 &header->pmode_efer_high))
54 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
52#endif /* !CONFIG_64BIT */ 55#endif /* !CONFIG_64BIT */
53 56
54 header->pmode_cr0 = read_cr0(); 57 header->pmode_cr0 = read_cr0();
55 header->pmode_cr4 = read_cr4_safe(); 58 if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
56 header->pmode_behavior = 0; 59 header->pmode_cr4 = read_cr4();
60 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
61 }
57 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, 62 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
58 &header->pmode_misc_en_low, 63 &header->pmode_misc_en_low,
59 &header->pmode_misc_en_high)) 64 &header->pmode_misc_en_high))
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ced4534baed5..ef5ccca79a6c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -23,19 +23,6 @@
23 23
24#define MAX_PATCH_LEN (255-1) 24#define MAX_PATCH_LEN (255-1)
25 25
26#ifdef CONFIG_HOTPLUG_CPU
27static int smp_alt_once;
28
29static int __init bootonly(char *str)
30{
31 smp_alt_once = 1;
32 return 1;
33}
34__setup("smp-alt-boot", bootonly);
35#else
36#define smp_alt_once 1
37#endif
38
39static int __initdata_or_module debug_alternative; 26static int __initdata_or_module debug_alternative;
40 27
41static int __init debug_alt(char *str) 28static int __init debug_alt(char *str)
@@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
317 /* turn DS segment override prefix into lock prefix */ 304 /* turn DS segment override prefix into lock prefix */
318 if (*ptr == 0x3e) 305 if (*ptr == 0x3e)
319 text_poke(ptr, ((unsigned char []){0xf0}), 1); 306 text_poke(ptr, ((unsigned char []){0xf0}), 1);
320 }; 307 }
321 mutex_unlock(&text_mutex); 308 mutex_unlock(&text_mutex);
322} 309}
323 310
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
326{ 313{
327 const s32 *poff; 314 const s32 *poff;
328 315
329 if (noreplace_smp)
330 return;
331
332 mutex_lock(&text_mutex); 316 mutex_lock(&text_mutex);
333 for (poff = start; poff < end; poff++) { 317 for (poff = start; poff < end; poff++) {
334 u8 *ptr = (u8 *)poff + *poff; 318 u8 *ptr = (u8 *)poff + *poff;
@@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
338 /* turn lock prefix into DS segment override prefix */ 322 /* turn lock prefix into DS segment override prefix */
339 if (*ptr == 0xf0) 323 if (*ptr == 0xf0)
340 text_poke(ptr, ((unsigned char []){0x3E}), 1); 324 text_poke(ptr, ((unsigned char []){0x3E}), 1);
341 }; 325 }
342 mutex_unlock(&text_mutex); 326 mutex_unlock(&text_mutex);
343} 327}
344 328
@@ -359,7 +343,7 @@ struct smp_alt_module {
359}; 343};
360static LIST_HEAD(smp_alt_modules); 344static LIST_HEAD(smp_alt_modules);
361static DEFINE_MUTEX(smp_alt); 345static DEFINE_MUTEX(smp_alt);
362static int smp_mode = 1; /* protected by smp_alt */ 346static bool uniproc_patched = false; /* protected by smp_alt */
363 347
364void __init_or_module alternatives_smp_module_add(struct module *mod, 348void __init_or_module alternatives_smp_module_add(struct module *mod,
365 char *name, 349 char *name,
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
368{ 352{
369 struct smp_alt_module *smp; 353 struct smp_alt_module *smp;
370 354
371 if (noreplace_smp) 355 mutex_lock(&smp_alt);
372 return; 356 if (!uniproc_patched)
357 goto unlock;
373 358
374 if (smp_alt_once) { 359 if (num_possible_cpus() == 1)
375 if (boot_cpu_has(X86_FEATURE_UP)) 360 /* Don't bother remembering, we'll never have to undo it. */
376 alternatives_smp_unlock(locks, locks_end, 361 goto smp_unlock;
377 text, text_end);
378 return;
379 }
380 362
381 smp = kzalloc(sizeof(*smp), GFP_KERNEL); 363 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
382 if (NULL == smp) 364 if (NULL == smp)
383 return; /* we'll run the (safe but slow) SMP code then ... */ 365 /* we'll run the (safe but slow) SMP code then ... */
366 goto unlock;
384 367
385 smp->mod = mod; 368 smp->mod = mod;
386 smp->name = name; 369 smp->name = name;
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
392 __func__, smp->locks, smp->locks_end, 375 __func__, smp->locks, smp->locks_end,
393 smp->text, smp->text_end, smp->name); 376 smp->text, smp->text_end, smp->name);
394 377
395 mutex_lock(&smp_alt);
396 list_add_tail(&smp->next, &smp_alt_modules); 378 list_add_tail(&smp->next, &smp_alt_modules);
397 if (boot_cpu_has(X86_FEATURE_UP)) 379smp_unlock:
398 alternatives_smp_unlock(smp->locks, smp->locks_end, 380 alternatives_smp_unlock(locks, locks_end, text, text_end);
399 smp->text, smp->text_end); 381unlock:
400 mutex_unlock(&smp_alt); 382 mutex_unlock(&smp_alt);
401} 383}
402 384
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
404{ 386{
405 struct smp_alt_module *item; 387 struct smp_alt_module *item;
406 388
407 if (smp_alt_once || noreplace_smp)
408 return;
409
410 mutex_lock(&smp_alt); 389 mutex_lock(&smp_alt);
411 list_for_each_entry(item, &smp_alt_modules, next) { 390 list_for_each_entry(item, &smp_alt_modules, next) {
412 if (mod != item->mod) 391 if (mod != item->mod)
413 continue; 392 continue;
414 list_del(&item->next); 393 list_del(&item->next);
415 mutex_unlock(&smp_alt);
416 DPRINTK("%s: %s\n", __func__, item->name);
417 kfree(item); 394 kfree(item);
418 return; 395 break;
419 } 396 }
420 mutex_unlock(&smp_alt); 397 mutex_unlock(&smp_alt);
421} 398}
422 399
423bool skip_smp_alternatives; 400void alternatives_enable_smp(void)
424void alternatives_smp_switch(int smp)
425{ 401{
426 struct smp_alt_module *mod; 402 struct smp_alt_module *mod;
427 403
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp)
436 pr_info("lockdep: fixing up alternatives\n"); 412 pr_info("lockdep: fixing up alternatives\n");
437#endif 413#endif
438 414
439 if (noreplace_smp || smp_alt_once || skip_smp_alternatives) 415 /* Why bother if there are no other CPUs? */
440 return; 416 BUG_ON(num_possible_cpus() == 1);
441 BUG_ON(!smp && (num_online_cpus() > 1));
442 417
443 mutex_lock(&smp_alt); 418 mutex_lock(&smp_alt);
444 419
445 /* 420 if (uniproc_patched) {
446 * Avoid unnecessary switches because it forces JIT based VMs to
447 * throw away all cached translations, which can be quite costly.
448 */
449 if (smp == smp_mode) {
450 /* nothing */
451 } else if (smp) {
452 pr_info("switching to SMP code\n"); 421 pr_info("switching to SMP code\n");
422 BUG_ON(num_online_cpus() != 1);
453 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); 423 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
454 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); 424 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
455 list_for_each_entry(mod, &smp_alt_modules, next) 425 list_for_each_entry(mod, &smp_alt_modules, next)
456 alternatives_smp_lock(mod->locks, mod->locks_end, 426 alternatives_smp_lock(mod->locks, mod->locks_end,
457 mod->text, mod->text_end); 427 mod->text, mod->text_end);
458 } else { 428 uniproc_patched = false;
459 pr_info("switching to UP code\n");
460 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
461 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
462 list_for_each_entry(mod, &smp_alt_modules, next)
463 alternatives_smp_unlock(mod->locks, mod->locks_end,
464 mod->text, mod->text_end);
465 } 429 }
466 smp_mode = smp;
467 mutex_unlock(&smp_alt); 430 mutex_unlock(&smp_alt);
468} 431}
469 432
@@ -540,40 +503,22 @@ void __init alternative_instructions(void)
540 503
541 apply_alternatives(__alt_instructions, __alt_instructions_end); 504 apply_alternatives(__alt_instructions, __alt_instructions_end);
542 505
543 /* switch to patch-once-at-boottime-only mode and free the
544 * tables in case we know the number of CPUs will never ever
545 * change */
546#ifdef CONFIG_HOTPLUG_CPU
547 if (num_possible_cpus() < 2)
548 smp_alt_once = 1;
549#endif
550
551#ifdef CONFIG_SMP 506#ifdef CONFIG_SMP
552 if (smp_alt_once) { 507 /* Patch to UP if other cpus not imminent. */
553 if (1 == num_possible_cpus()) { 508 if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
554 pr_info("switching to UP code\n"); 509 uniproc_patched = true;
555 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
556 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
557
558 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
559 _text, _etext);
560 }
561 } else {
562 alternatives_smp_module_add(NULL, "core kernel", 510 alternatives_smp_module_add(NULL, "core kernel",
563 __smp_locks, __smp_locks_end, 511 __smp_locks, __smp_locks_end,
564 _text, _etext); 512 _text, _etext);
565
566 /* Only switch to UP mode if we don't immediately boot others */
567 if (num_present_cpus() == 1 || setup_max_cpus <= 1)
568 alternatives_smp_switch(0);
569 } 513 }
570#endif
571 apply_paravirt(__parainstructions, __parainstructions_end);
572 514
573 if (smp_alt_once) 515 if (!uniproc_patched || num_possible_cpus() == 1)
574 free_init_pages("SMP alternatives", 516 free_init_pages("SMP alternatives",
575 (unsigned long)__smp_locks, 517 (unsigned long)__smp_locks,
576 (unsigned long)__smp_locks_end); 518 (unsigned long)__smp_locks_end);
519#endif
520
521 apply_paravirt(__parainstructions, __parainstructions_end);
577 522
578 restart_nmi(); 523 restart_nmi();
579} 524}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 24deb3082328..b17416e72fbd 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs)
1934 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); 1934 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
1935 i++; 1935 i++;
1936 v1 >>= 1; 1936 v1 >>= 1;
1937 }; 1937 }
1938 1938
1939 apic_printk(APIC_DEBUG, KERN_CONT "\n"); 1939 apic_printk(APIC_DEBUG, KERN_CONT "\n");
1940 1940
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9d92e19039f0..f7e98a2c0d12 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
737} 737}
738#endif 738#endif
739 739
740static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
741{
742 if (!cpu_has_invlpg)
743 return;
744
745 tlb_flushall_shift = 5;
746
747 if (c->x86 <= 0x11)
748 tlb_flushall_shift = 4;
749}
750
751static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
752{
753 u32 ebx, eax, ecx, edx;
754 u16 mask = 0xfff;
755
756 if (c->x86 < 0xf)
757 return;
758
759 if (c->extended_cpuid_level < 0x80000006)
760 return;
761
762 cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
763
764 tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
765 tlb_lli_4k[ENTRIES] = ebx & mask;
766
767 /*
768 * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
769 * characteristics from the CPUID function 0x80000005 instead.
770 */
771 if (c->x86 == 0xf) {
772 cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
773 mask = 0xff;
774 }
775
776 /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
777 if (!((eax >> 16) & mask)) {
778 u32 a, b, c, d;
779
780 cpuid(0x80000005, &a, &b, &c, &d);
781 tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
782 } else {
783 tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
784 }
785
786 /* a 4M entry uses two 2M entries */
787 tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
788
789 /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
790 if (!(eax & mask)) {
791 /* Erratum 658 */
792 if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
793 tlb_lli_2m[ENTRIES] = 1024;
794 } else {
795 cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
796 tlb_lli_2m[ENTRIES] = eax & 0xff;
797 }
798 } else
799 tlb_lli_2m[ENTRIES] = eax & mask;
800
801 tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
802
803 cpu_set_tlb_flushall_shift(c);
804}
805
740static const struct cpu_dev __cpuinitconst amd_cpu_dev = { 806static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
741 .c_vendor = "AMD", 807 .c_vendor = "AMD",
742 .c_ident = { "AuthenticAMD" }, 808 .c_ident = { "AuthenticAMD" },
@@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
756 .c_size_cache = amd_size_cache, 822 .c_size_cache = amd_size_cache,
757#endif 823#endif
758 .c_early_init = early_init_amd, 824 .c_early_init = early_init_amd,
825 .c_detect_tlb = cpu_detect_tlb_amd,
759 .c_bsp_init = bsp_init_amd, 826 .c_bsp_init = bsp_init_amd,
760 .c_init = init_amd, 827 .c_init = init_amd,
761 .c_x86_vendor = X86_VENDOR_AMD, 828 .c_x86_vendor = X86_VENDOR_AMD,
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c97bb7b5a9f8..d0e910da16c5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -165,10 +165,15 @@ void __init check_bugs(void)
165 print_cpu_info(&boot_cpu_data); 165 print_cpu_info(&boot_cpu_data);
166#endif 166#endif
167 check_config(); 167 check_config();
168 check_fpu();
169 check_hlt(); 168 check_hlt();
170 check_popad(); 169 check_popad();
171 init_utsname()->machine[1] = 170 init_utsname()->machine[1] =
172 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 171 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
173 alternative_instructions(); 172 alternative_instructions();
173
174 /*
175 * kernel_fpu_begin/end() in check_fpu() relies on the patched
176 * alternative instructions.
177 */
178 check_fpu();
174} 179}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c5fccc..7505f7b13e71 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -259,23 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
259} 259}
260#endif 260#endif
261 261
262static int disable_smep __cpuinitdata;
263static __init int setup_disable_smep(char *arg) 262static __init int setup_disable_smep(char *arg)
264{ 263{
265 disable_smep = 1; 264 setup_clear_cpu_cap(X86_FEATURE_SMEP);
266 return 1; 265 return 1;
267} 266}
268__setup("nosmep", setup_disable_smep); 267__setup("nosmep", setup_disable_smep);
269 268
270static __cpuinit void setup_smep(struct cpuinfo_x86 *c) 269static __always_inline void setup_smep(struct cpuinfo_x86 *c)
271{ 270{
272 if (cpu_has(c, X86_FEATURE_SMEP)) { 271 if (cpu_has(c, X86_FEATURE_SMEP))
273 if (unlikely(disable_smep)) { 272 set_in_cr4(X86_CR4_SMEP);
274 setup_clear_cpu_cap(X86_FEATURE_SMEP); 273}
275 clear_in_cr4(X86_CR4_SMEP); 274
276 } else 275static __init int setup_disable_smap(char *arg)
277 set_in_cr4(X86_CR4_SMEP); 276{
278 } 277 setup_clear_cpu_cap(X86_FEATURE_SMAP);
278 return 1;
279}
280__setup("nosmap", setup_disable_smap);
281
282static __always_inline void setup_smap(struct cpuinfo_x86 *c)
283{
284 unsigned long eflags;
285
286 /* This should have been cleared long ago */
287 raw_local_save_flags(eflags);
288 BUG_ON(eflags & X86_EFLAGS_AC);
289
290 if (cpu_has(c, X86_FEATURE_SMAP))
291 set_in_cr4(X86_CR4_SMAP);
279} 292}
280 293
281/* 294/*
@@ -476,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
476 489
477 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ 490 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
478 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ 491 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
479 "tlb_flushall_shift is 0x%x\n", 492 "tlb_flushall_shift: %d\n",
480 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], 493 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
481 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], 494 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
482 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], 495 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
@@ -712,8 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
712 c->cpu_index = 0; 725 c->cpu_index = 0;
713 filter_cpuid_features(c, false); 726 filter_cpuid_features(c, false);
714 727
715 setup_smep(c);
716
717 if (this_cpu->c_bsp_init) 728 if (this_cpu->c_bsp_init)
718 this_cpu->c_bsp_init(c); 729 this_cpu->c_bsp_init(c);
719} 730}
@@ -798,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
798 c->phys_proc_id = c->initial_apicid; 809 c->phys_proc_id = c->initial_apicid;
799 } 810 }
800 811
801 setup_smep(c);
802
803 get_model_name(c); /* Default name */ 812 get_model_name(c); /* Default name */
804 813
805 detect_nopl(c); 814 detect_nopl(c);
@@ -864,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
864 /* Disable the PN if appropriate */ 873 /* Disable the PN if appropriate */
865 squash_the_stupid_serial_number(c); 874 squash_the_stupid_serial_number(c);
866 875
876 /* Set up SMEP/SMAP */
877 setup_smep(c);
878 setup_smap(c);
879
867 /* 880 /*
868 * The vendor-specific functions might have changed features. 881 * The vendor-specific functions might have changed features.
869 * Now we do "generic changes." 882 * Now we do "generic changes."
@@ -942,8 +955,7 @@ void __init identify_boot_cpu(void)
942#else 955#else
943 vgetcpu_set_mode(); 956 vgetcpu_set_mode();
944#endif 957#endif
945 if (boot_cpu_data.cpuid_level >= 2) 958 cpu_detect_tlb(&boot_cpu_data);
946 cpu_detect_tlb(&boot_cpu_data);
947} 959}
948 960
949void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 961void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1023,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1023 printk(KERN_CONT "%s ", vendor); 1035 printk(KERN_CONT "%s ", vendor);
1024 1036
1025 if (c->x86_model_id[0]) 1037 if (c->x86_model_id[0])
1026 printk(KERN_CONT "%s", c->x86_model_id); 1038 printk(KERN_CONT "%s", strim(c->x86_model_id));
1027 else 1039 else
1028 printk(KERN_CONT "%d86", c->x86); 1040 printk(KERN_CONT "%d86", c->x86);
1029 1041
1042 printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
1043
1030 if (c->x86_mask || c->cpuid_level >= 0) 1044 if (c->x86_mask || c->cpuid_level >= 0)
1031 printk(KERN_CONT " stepping %02x\n", c->x86_mask); 1045 printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
1032 else 1046 else
1033 printk(KERN_CONT "\n"); 1047 printk(KERN_CONT ")\n");
1034 1048
1035 print_cpu_msr(c); 1049 print_cpu_msr(c);
1036} 1050}
@@ -1113,11 +1127,10 @@ void syscall_init(void)
1113 1127
1114 /* Flags to clear on syscall */ 1128 /* Flags to clear on syscall */
1115 wrmsrl(MSR_SYSCALL_MASK, 1129 wrmsrl(MSR_SYSCALL_MASK,
1116 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); 1130 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
1131 X86_EFLAGS_IOPL|X86_EFLAGS_AC);
1117} 1132}
1118 1133
1119unsigned long kernel_eflags;
1120
1121/* 1134/*
1122 * Copies of the original ist values from the tss are only accessed during 1135 * Copies of the original ist values from the tss are only accessed during
1123 * debugging, no special alignment required. 1136 * debugging, no special alignment required.
@@ -1297,9 +1310,6 @@ void __cpuinit cpu_init(void)
1297 dbg_restore_debug_regs(); 1310 dbg_restore_debug_regs();
1298 1311
1299 fpu_init(); 1312 fpu_init();
1300 xsave_init();
1301
1302 raw_local_save_flags(kernel_eflags);
1303 1313
1304 if (is_uv_system()) 1314 if (is_uv_system())
1305 uv_cpu_init(); 1315 uv_cpu_init();
@@ -1352,6 +1362,5 @@ void __cpuinit cpu_init(void)
1352 dbg_restore_debug_regs(); 1362 dbg_restore_debug_regs();
1353 1363
1354 fpu_init(); 1364 fpu_init();
1355 xsave_init();
1356} 1365}
1357#endif 1366#endif
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0a4ce2980a5a..198e019a531a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
648 int i, j, n; 648 int i, j, n;
649 unsigned int regs[4]; 649 unsigned int regs[4];
650 unsigned char *desc = (unsigned char *)regs; 650 unsigned char *desc = (unsigned char *)regs;
651
652 if (c->cpuid_level < 2)
653 return;
654
651 /* Number of times to iterate */ 655 /* Number of times to iterate */
652 n = cpuid_eax(2) & 0xFF; 656 n = cpuid_eax(2) & 0xFF;
653 657
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index fc4beb393577..ddc72f839332 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
78} 78}
79 79
80static cpumask_var_t mce_inject_cpumask; 80static cpumask_var_t mce_inject_cpumask;
81static DEFINE_MUTEX(mce_inject_mutex);
81 82
82static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) 83static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
83{ 84{
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
194 put_online_cpus(); 195 put_online_cpus();
195 } else 196 } else
196#endif 197#endif
198 {
199 preempt_disable();
197 raise_local(); 200 raise_local();
201 preempt_enable();
202 }
198} 203}
199 204
200/* Error injection interface */ 205/* Error injection interface */
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
225 * so do it a jiffie or two later everywhere. 230 * so do it a jiffie or two later everywhere.
226 */ 231 */
227 schedule_timeout(2); 232 schedule_timeout(2);
233
234 mutex_lock(&mce_inject_mutex);
228 raise_mce(&m); 235 raise_mce(&m);
236 mutex_unlock(&mce_inject_mutex);
229 return usize; 237 return usize;
230} 238}
231 239
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed44c8a65858..6a05c1d327a9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,6 +28,18 @@ extern int mce_ser;
28 28
29extern struct mce_bank *mce_banks; 29extern struct mce_bank *mce_banks;
30 30
31#ifdef CONFIG_X86_MCE_INTEL
32unsigned long mce_intel_adjust_timer(unsigned long interval);
33void mce_intel_cmci_poll(void);
34void mce_intel_hcpu_update(unsigned long cpu);
35#else
36# define mce_intel_adjust_timer mce_adjust_timer_default
37static inline void mce_intel_cmci_poll(void) { }
38static inline void mce_intel_hcpu_update(unsigned long cpu) { }
39#endif
40
41void mce_timer_kick(unsigned long interval);
42
31#ifdef CONFIG_ACPI_APEI 43#ifdef CONFIG_ACPI_APEI
32int apei_write_mce(struct mce *m); 44int apei_write_mce(struct mce *m);
33ssize_t apei_read_mce(struct mce *m, u64 *record_id); 45ssize_t apei_read_mce(struct mce *m, u64 *record_id);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 292d0258311c..29e87d3b2843 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
83int mce_cmci_disabled __read_mostly; 83int mce_cmci_disabled __read_mostly;
84int mce_ignore_ce __read_mostly; 84int mce_ignore_ce __read_mostly;
85int mce_ser __read_mostly; 85int mce_ser __read_mostly;
86int mce_bios_cmci_threshold __read_mostly;
86 87
87struct mce_bank *mce_banks __read_mostly; 88struct mce_bank *mce_banks __read_mostly;
88 89
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
1266static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ 1267static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
1267static DEFINE_PER_CPU(struct timer_list, mce_timer); 1268static DEFINE_PER_CPU(struct timer_list, mce_timer);
1268 1269
1270static unsigned long mce_adjust_timer_default(unsigned long interval)
1271{
1272 return interval;
1273}
1274
1275static unsigned long (*mce_adjust_timer)(unsigned long interval) =
1276 mce_adjust_timer_default;
1277
1269static void mce_timer_fn(unsigned long data) 1278static void mce_timer_fn(unsigned long data)
1270{ 1279{
1271 struct timer_list *t = &__get_cpu_var(mce_timer); 1280 struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data)
1276 if (mce_available(__this_cpu_ptr(&cpu_info))) { 1285 if (mce_available(__this_cpu_ptr(&cpu_info))) {
1277 machine_check_poll(MCP_TIMESTAMP, 1286 machine_check_poll(MCP_TIMESTAMP,
1278 &__get_cpu_var(mce_poll_banks)); 1287 &__get_cpu_var(mce_poll_banks));
1288 mce_intel_cmci_poll();
1279 } 1289 }
1280 1290
1281 /* 1291 /*
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data)
1283 * polling interval, otherwise increase the polling interval. 1293 * polling interval, otherwise increase the polling interval.
1284 */ 1294 */
1285 iv = __this_cpu_read(mce_next_interval); 1295 iv = __this_cpu_read(mce_next_interval);
1286 if (mce_notify_irq()) 1296 if (mce_notify_irq()) {
1287 iv = max(iv / 2, (unsigned long) HZ/100); 1297 iv = max(iv / 2, (unsigned long) HZ/100);
1288 else 1298 } else {
1289 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); 1299 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1300 iv = mce_adjust_timer(iv);
1301 }
1290 __this_cpu_write(mce_next_interval, iv); 1302 __this_cpu_write(mce_next_interval, iv);
1303 /* Might have become 0 after CMCI storm subsided */
1304 if (iv) {
1305 t->expires = jiffies + iv;
1306 add_timer_on(t, smp_processor_id());
1307 }
1308}
1291 1309
1292 t->expires = jiffies + iv; 1310/*
1293 add_timer_on(t, smp_processor_id()); 1311 * Ensure that the timer is firing in @interval from now.
1312 */
1313void mce_timer_kick(unsigned long interval)
1314{
1315 struct timer_list *t = &__get_cpu_var(mce_timer);
1316 unsigned long when = jiffies + interval;
1317 unsigned long iv = __this_cpu_read(mce_next_interval);
1318
1319 if (timer_pending(t)) {
1320 if (time_before(when, t->expires))
1321 mod_timer_pinned(t, when);
1322 } else {
1323 t->expires = round_jiffies(when);
1324 add_timer_on(t, smp_processor_id());
1325 }
1326 if (interval < iv)
1327 __this_cpu_write(mce_next_interval, interval);
1294} 1328}
1295 1329
1296/* Must not be called in IRQ context where del_timer_sync() can deadlock */ 1330/* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1585 switch (c->x86_vendor) { 1619 switch (c->x86_vendor) {
1586 case X86_VENDOR_INTEL: 1620 case X86_VENDOR_INTEL:
1587 mce_intel_feature_init(c); 1621 mce_intel_feature_init(c);
1622 mce_adjust_timer = mce_intel_adjust_timer;
1588 break; 1623 break;
1589 case X86_VENDOR_AMD: 1624 case X86_VENDOR_AMD:
1590 mce_amd_feature_init(c); 1625 mce_amd_feature_init(c);
@@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1594 } 1629 }
1595} 1630}
1596 1631
1597static void __mcheck_cpu_init_timer(void) 1632static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1598{ 1633{
1599 struct timer_list *t = &__get_cpu_var(mce_timer); 1634 unsigned long iv = mce_adjust_timer(check_interval * HZ);
1600 unsigned long iv = check_interval * HZ;
1601 1635
1602 setup_timer(t, mce_timer_fn, smp_processor_id()); 1636 __this_cpu_write(mce_next_interval, iv);
1603 1637
1604 if (mce_ignore_ce) 1638 if (mce_ignore_ce || !iv)
1605 return; 1639 return;
1606 1640
1607 __this_cpu_write(mce_next_interval, iv);
1608 if (!iv)
1609 return;
1610 t->expires = round_jiffies(jiffies + iv); 1641 t->expires = round_jiffies(jiffies + iv);
1611 add_timer_on(t, smp_processor_id()); 1642 add_timer_on(t, smp_processor_id());
1612} 1643}
1613 1644
1645static void __mcheck_cpu_init_timer(void)
1646{
1647 struct timer_list *t = &__get_cpu_var(mce_timer);
1648 unsigned int cpu = smp_processor_id();
1649
1650 setup_timer(t, mce_timer_fn, cpu);
1651 mce_start_timer(cpu, t);
1652}
1653
1614/* Handle unconfigured int18 (should never happen) */ 1654/* Handle unconfigured int18 (should never happen) */
1615static void unexpected_machine_check(struct pt_regs *regs, long error_code) 1655static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1616{ 1656{
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
1907 * check, or 0 to not wait 1947 * check, or 0 to not wait
1908 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 1948 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
1909 * mce=nobootlog Don't log MCEs from before booting. 1949 * mce=nobootlog Don't log MCEs from before booting.
1950 * mce=bios_cmci_threshold Don't program the CMCI threshold
1910 */ 1951 */
1911static int __init mcheck_enable(char *str) 1952static int __init mcheck_enable(char *str)
1912{ 1953{
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str)
1926 mce_ignore_ce = 1; 1967 mce_ignore_ce = 1;
1927 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) 1968 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
1928 mce_bootlog = (str[0] == 'b'); 1969 mce_bootlog = (str[0] == 'b');
1970 else if (!strcmp(str, "bios_cmci_threshold"))
1971 mce_bios_cmci_threshold = 1;
1929 else if (isdigit(str[0])) { 1972 else if (isdigit(str[0])) {
1930 get_option(&str, &tolerant); 1973 get_option(&str, &tolerant);
1931 if (*str == ',') { 1974 if (*str == ',') {
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
2166 &mce_cmci_disabled 2209 &mce_cmci_disabled
2167}; 2210};
2168 2211
2212static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
2213 __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
2214 &mce_bios_cmci_threshold
2215};
2216
2169static struct device_attribute *mce_device_attrs[] = { 2217static struct device_attribute *mce_device_attrs[] = {
2170 &dev_attr_tolerant.attr, 2218 &dev_attr_tolerant.attr,
2171 &dev_attr_check_interval.attr, 2219 &dev_attr_check_interval.attr,
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
2174 &dev_attr_dont_log_ce.attr, 2222 &dev_attr_dont_log_ce.attr,
2175 &dev_attr_ignore_ce.attr, 2223 &dev_attr_ignore_ce.attr,
2176 &dev_attr_cmci_disabled.attr, 2224 &dev_attr_cmci_disabled.attr,
2225 &dev_attr_bios_cmci_threshold.attr,
2177 NULL 2226 NULL
2178}; 2227};
2179 2228
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2294 unsigned int cpu = (unsigned long)hcpu; 2343 unsigned int cpu = (unsigned long)hcpu;
2295 struct timer_list *t = &per_cpu(mce_timer, cpu); 2344 struct timer_list *t = &per_cpu(mce_timer, cpu);
2296 2345
2297 switch (action) { 2346 switch (action & ~CPU_TASKS_FROZEN) {
2298 case CPU_ONLINE: 2347 case CPU_ONLINE:
2299 case CPU_ONLINE_FROZEN:
2300 mce_device_create(cpu); 2348 mce_device_create(cpu);
2301 if (threshold_cpu_callback) 2349 if (threshold_cpu_callback)
2302 threshold_cpu_callback(action, cpu); 2350 threshold_cpu_callback(action, cpu);
2303 break; 2351 break;
2304 case CPU_DEAD: 2352 case CPU_DEAD:
2305 case CPU_DEAD_FROZEN:
2306 if (threshold_cpu_callback) 2353 if (threshold_cpu_callback)
2307 threshold_cpu_callback(action, cpu); 2354 threshold_cpu_callback(action, cpu);
2308 mce_device_remove(cpu); 2355 mce_device_remove(cpu);
2356 mce_intel_hcpu_update(cpu);
2309 break; 2357 break;
2310 case CPU_DOWN_PREPARE: 2358 case CPU_DOWN_PREPARE:
2311 case CPU_DOWN_PREPARE_FROZEN:
2312 del_timer_sync(t);
2313 smp_call_function_single(cpu, mce_disable_cpu, &action, 1); 2359 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
2360 del_timer_sync(t);
2314 break; 2361 break;
2315 case CPU_DOWN_FAILED: 2362 case CPU_DOWN_FAILED:
2316 case CPU_DOWN_FAILED_FROZEN:
2317 if (!mce_ignore_ce && check_interval) {
2318 t->expires = round_jiffies(jiffies +
2319 per_cpu(mce_next_interval, cpu));
2320 add_timer_on(t, cpu);
2321 }
2322 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 2363 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
2364 mce_start_timer(cpu, t);
2323 break; 2365 break;
2324 case CPU_POST_DEAD: 2366 }
2367
2368 if (action == CPU_POST_DEAD) {
2325 /* intentionally ignoring frozen here */ 2369 /* intentionally ignoring frozen here */
2326 cmci_rediscover(cpu); 2370 cmci_rediscover(cpu);
2327 break;
2328 } 2371 }
2372
2329 return NOTIFY_OK; 2373 return NOTIFY_OK;
2330} 2374}
2331 2375
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 38e49bc95ffc..5f88abf07e9c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -15,6 +15,8 @@
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/mce.h> 16#include <asm/mce.h>
17 17
18#include "mce-internal.h"
19
18/* 20/*
19 * Support for Intel Correct Machine Check Interrupts. This allows 21 * Support for Intel Correct Machine Check Interrupts. This allows
20 * the CPU to raise an interrupt when a corrected machine check happened. 22 * the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
30 */ 32 */
31static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 33static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
32 34
33#define CMCI_THRESHOLD 1 35#define CMCI_THRESHOLD 1
36#define CMCI_POLL_INTERVAL (30 * HZ)
37#define CMCI_STORM_INTERVAL (1 * HZ)
38#define CMCI_STORM_THRESHOLD 15
39
40static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
41static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
42static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
43
44enum {
45 CMCI_STORM_NONE,
46 CMCI_STORM_ACTIVE,
47 CMCI_STORM_SUBSIDED,
48};
49
50static atomic_t cmci_storm_on_cpus;
34 51
35static int cmci_supported(int *banks) 52static int cmci_supported(int *banks)
36{ 53{
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
53 return !!(cap & MCG_CMCI_P); 70 return !!(cap & MCG_CMCI_P);
54} 71}
55 72
73void mce_intel_cmci_poll(void)
74{
75 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
76 return;
77 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
78}
79
80void mce_intel_hcpu_update(unsigned long cpu)
81{
82 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
83 atomic_dec(&cmci_storm_on_cpus);
84
85 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
86}
87
88unsigned long mce_intel_adjust_timer(unsigned long interval)
89{
90 int r;
91
92 if (interval < CMCI_POLL_INTERVAL)
93 return interval;
94
95 switch (__this_cpu_read(cmci_storm_state)) {
96 case CMCI_STORM_ACTIVE:
97 /*
98 * We switch back to interrupt mode once the poll timer has
99 * silenced itself. That means no events recorded and the
100 * timer interval is back to our poll interval.
101 */
102 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
103 r = atomic_sub_return(1, &cmci_storm_on_cpus);
104 if (r == 0)
105 pr_notice("CMCI storm subsided: switching to interrupt mode\n");
106 /* FALLTHROUGH */
107
108 case CMCI_STORM_SUBSIDED:
109 /*
110 * We wait for all cpus to go back to SUBSIDED
111 * state. When that happens we switch back to
112 * interrupt mode.
113 */
114 if (!atomic_read(&cmci_storm_on_cpus)) {
115 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
116 cmci_reenable();
117 cmci_recheck();
118 }
119 return CMCI_POLL_INTERVAL;
120 default:
121 /*
122 * We have shiny weather. Let the poll do whatever it
123 * thinks.
124 */
125 return interval;
126 }
127}
128
129static bool cmci_storm_detect(void)
130{
131 unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
132 unsigned long ts = __this_cpu_read(cmci_time_stamp);
133 unsigned long now = jiffies;
134 int r;
135
136 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
137 return true;
138
139 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
140 cnt++;
141 } else {
142 cnt = 1;
143 __this_cpu_write(cmci_time_stamp, now);
144 }
145 __this_cpu_write(cmci_storm_cnt, cnt);
146
147 if (cnt <= CMCI_STORM_THRESHOLD)
148 return false;
149
150 cmci_clear();
151 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
152 r = atomic_add_return(1, &cmci_storm_on_cpus);
153 mce_timer_kick(CMCI_POLL_INTERVAL);
154
155 if (r == 1)
156 pr_notice("CMCI storm detected: switching to poll mode\n");
157 return true;
158}
159
56/* 160/*
57 * The interrupt handler. This is called on every event. 161 * The interrupt handler. This is called on every event.
58 * Just call the poller directly to log any events. 162 * Just call the poller directly to log any events.
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks)
61 */ 165 */
62static void intel_threshold_interrupt(void) 166static void intel_threshold_interrupt(void)
63{ 167{
168 if (cmci_storm_detect())
169 return;
64 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); 170 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
65 mce_notify_irq(); 171 mce_notify_irq();
66} 172}
67 173
68static void print_update(char *type, int *hdr, int num)
69{
70 if (*hdr == 0)
71 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
72 *hdr = 1;
73 printk(KERN_CONT " %s:%d", type, num);
74}
75
76/* 174/*
77 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 175 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
78 * on this CPU. Use the algorithm recommended in the SDM to discover shared 176 * on this CPU. Use the algorithm recommended in the SDM to discover shared
79 * banks. 177 * banks.
80 */ 178 */
81static void cmci_discover(int banks, int boot) 179static void cmci_discover(int banks)
82{ 180{
83 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); 181 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
84 unsigned long flags; 182 unsigned long flags;
85 int hdr = 0;
86 int i; 183 int i;
184 int bios_wrong_thresh = 0;
87 185
88 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 186 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
89 for (i = 0; i < banks; i++) { 187 for (i = 0; i < banks; i++) {
90 u64 val; 188 u64 val;
189 int bios_zero_thresh = 0;
91 190
92 if (test_bit(i, owned)) 191 if (test_bit(i, owned))
93 continue; 192 continue;
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot)
96 195
97 /* Already owned by someone else? */ 196 /* Already owned by someone else? */
98 if (val & MCI_CTL2_CMCI_EN) { 197 if (val & MCI_CTL2_CMCI_EN) {
99 if (test_and_clear_bit(i, owned) && !boot) 198 clear_bit(i, owned);
100 print_update("SHD", &hdr, i);
101 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 199 __clear_bit(i, __get_cpu_var(mce_poll_banks));
102 continue; 200 continue;
103 } 201 }
104 202
105 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 203 if (!mce_bios_cmci_threshold) {
106 val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; 204 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
205 val |= CMCI_THRESHOLD;
206 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
207 /*
208 * If bios_cmci_threshold boot option was specified
209 * but the threshold is zero, we'll try to initialize
210 * it to 1.
211 */
212 bios_zero_thresh = 1;
213 val |= CMCI_THRESHOLD;
214 }
215
216 val |= MCI_CTL2_CMCI_EN;
107 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 217 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
108 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 218 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
109 219
110 /* Did the enable bit stick? -- the bank supports CMCI */ 220 /* Did the enable bit stick? -- the bank supports CMCI */
111 if (val & MCI_CTL2_CMCI_EN) { 221 if (val & MCI_CTL2_CMCI_EN) {
112 if (!test_and_set_bit(i, owned) && !boot) 222 set_bit(i, owned);
113 print_update("CMCI", &hdr, i);
114 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 223 __clear_bit(i, __get_cpu_var(mce_poll_banks));
224 /*
225 * We are able to set thresholds for some banks that
226 * had a threshold of 0. This means the BIOS has not
227 * set the thresholds properly or does not work with
228 * this boot option. Note down now and report later.
229 */
230 if (mce_bios_cmci_threshold && bios_zero_thresh &&
231 (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
232 bios_wrong_thresh = 1;
115 } else { 233 } else {
116 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); 234 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
117 } 235 }
118 } 236 }
119 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 237 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
120 if (hdr) 238 if (mce_bios_cmci_threshold && bios_wrong_thresh) {
121 printk(KERN_CONT "\n"); 239 pr_info_once(
240 "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
241 pr_info_once(
242 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
243 }
122} 244}
123 245
124/* 246/*
@@ -156,7 +278,7 @@ void cmci_clear(void)
156 continue; 278 continue;
157 /* Disable CMCI */ 279 /* Disable CMCI */
158 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 280 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
159 val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); 281 val &= ~MCI_CTL2_CMCI_EN;
160 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 282 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
161 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 283 __clear_bit(i, __get_cpu_var(mce_banks_owned));
162 } 284 }
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying)
186 continue; 308 continue;
187 /* Recheck banks in case CPUs don't all have the same */ 309 /* Recheck banks in case CPUs don't all have the same */
188 if (cmci_supported(&banks)) 310 if (cmci_supported(&banks))
189 cmci_discover(banks, 0); 311 cmci_discover(banks);
190 } 312 }
191 313
192 set_cpus_allowed_ptr(current, old); 314 set_cpus_allowed_ptr(current, old);
@@ -200,7 +322,7 @@ void cmci_reenable(void)
200{ 322{
201 int banks; 323 int banks;
202 if (cmci_supported(&banks)) 324 if (cmci_supported(&banks))
203 cmci_discover(banks, 0); 325 cmci_discover(banks);
204} 326}
205 327
206static void intel_init_cmci(void) 328static void intel_init_cmci(void)
@@ -211,7 +333,7 @@ static void intel_init_cmci(void)
211 return; 333 return;
212 334
213 mce_threshold_vector = intel_threshold_interrupt; 335 mce_threshold_vector = intel_threshold_interrupt;
214 cmci_discover(banks, 1); 336 cmci_discover(banks);
215 /* 337 /*
216 * For CPU #0 this runs with still disabled APIC, but that's 338 * For CPU #0 this runs with still disabled APIC, but that's
217 * ok because only the vector is set up. We still do another 339 * ok because only the vector is set up. We still do another
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
index c7b3fe2d72e0..091972ef49de 100644
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -8,7 +8,10 @@
8open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; 8open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
9open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; 9open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
10 10
11print OUT "#include <asm/cpufeature.h>\n\n"; 11print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
12print OUT "#include <asm/cpufeature.h>\n";
13print OUT "#endif\n";
14print OUT "\n";
12print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; 15print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
13 16
14%features = (); 17%features = ();
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 38e4894165b9..99d96a4978b5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -1950,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
1950static struct intel_uncore_box * 1950static struct intel_uncore_box *
1951uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 1951uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
1952{ 1952{
1953 static struct intel_uncore_box *box; 1953 struct intel_uncore_box *box;
1954 1954
1955 box = *per_cpu_ptr(pmu->box, cpu); 1955 box = *per_cpu_ptr(pmu->box, cpu);
1956 if (box) 1956 if (box)
@@ -2347,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event)
2347 return ret; 2347 return ret;
2348} 2348}
2349 2349
2350static ssize_t uncore_get_attr_cpumask(struct device *dev,
2351 struct device_attribute *attr, char *buf)
2352{
2353 int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
2354
2355 buf[n++] = '\n';
2356 buf[n] = '\0';
2357 return n;
2358}
2359
2360static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
2361
2362static struct attribute *uncore_pmu_attrs[] = {
2363 &dev_attr_cpumask.attr,
2364 NULL,
2365};
2366
2367static struct attribute_group uncore_pmu_attr_group = {
2368 .attrs = uncore_pmu_attrs,
2369};
2370
2350static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) 2371static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
2351{ 2372{
2352 int ret; 2373 int ret;
@@ -2384,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
2384 free_percpu(type->pmus[i].box); 2405 free_percpu(type->pmus[i].box);
2385 kfree(type->pmus); 2406 kfree(type->pmus);
2386 type->pmus = NULL; 2407 type->pmus = NULL;
2387 kfree(type->attr_groups[1]); 2408 kfree(type->events_group);
2388 type->attr_groups[1] = NULL; 2409 type->events_group = NULL;
2389} 2410}
2390 2411
2391static void __init uncore_types_exit(struct intel_uncore_type **types) 2412static void __init uncore_types_exit(struct intel_uncore_type **types)
@@ -2437,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
2437 for (j = 0; j < i; j++) 2458 for (j = 0; j < i; j++)
2438 attrs[j] = &type->event_descs[j].attr.attr; 2459 attrs[j] = &type->event_descs[j].attr.attr;
2439 2460
2440 type->attr_groups[1] = events_group; 2461 type->events_group = events_group;
2441 } 2462 }
2442 2463
2464 type->pmu_group = &uncore_pmu_attr_group;
2443 type->pmus = pmus; 2465 type->pmus = pmus;
2444 return 0; 2466 return 0;
2445fail: 2467fail:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 5b81c1856aac..e68a4550e952 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -369,10 +369,12 @@ struct intel_uncore_type {
369 struct intel_uncore_pmu *pmus; 369 struct intel_uncore_pmu *pmus;
370 struct intel_uncore_ops *ops; 370 struct intel_uncore_ops *ops;
371 struct uncore_event_desc *event_descs; 371 struct uncore_event_desc *event_descs;
372 const struct attribute_group *attr_groups[3]; 372 const struct attribute_group *attr_groups[4];
373}; 373};
374 374
375#define format_group attr_groups[0] 375#define pmu_group attr_groups[0]
376#define format_group attr_groups[1]
377#define events_group attr_groups[2]
376 378
377struct intel_uncore_ops { 379struct intel_uncore_ops {
378 void (*init_box)(struct intel_uncore_box *); 380 void (*init_box)(struct intel_uncore_box *);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 8022c6681485..fbd895562292 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
140 140
141static void *c_start(struct seq_file *m, loff_t *pos) 141static void *c_start(struct seq_file *m, loff_t *pos)
142{ 142{
143 if (*pos == 0) /* just in case, cpu 0 is not the first */ 143 *pos = cpumask_next(*pos - 1, cpu_online_mask);
144 *pos = cpumask_first(cpu_online_mask);
145 else
146 *pos = cpumask_next(*pos - 1, cpu_online_mask);
147 if ((*pos) < nr_cpu_ids) 144 if ((*pos) < nr_cpu_ids)
148 return &cpu_data(*pos); 145 return &cpu_data(*pos);
149 return NULL; 146 return NULL;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 39472dd2323f..60c78917190c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -199,12 +199,14 @@ static int __init cpuid_init(void)
199 goto out_chrdev; 199 goto out_chrdev;
200 } 200 }
201 cpuid_class->devnode = cpuid_devnode; 201 cpuid_class->devnode = cpuid_devnode;
202 get_online_cpus();
202 for_each_online_cpu(i) { 203 for_each_online_cpu(i) {
203 err = cpuid_device_create(i); 204 err = cpuid_device_create(i);
204 if (err != 0) 205 if (err != 0)
205 goto out_class; 206 goto out_class;
206 } 207 }
207 register_hotcpu_notifier(&cpuid_class_cpu_notifier); 208 register_hotcpu_notifier(&cpuid_class_cpu_notifier);
209 put_online_cpus();
208 210
209 err = 0; 211 err = 0;
210 goto out; 212 goto out;
@@ -214,6 +216,7 @@ out_class:
214 for_each_online_cpu(i) { 216 for_each_online_cpu(i) {
215 cpuid_device_destroy(i); 217 cpuid_device_destroy(i);
216 } 218 }
219 put_online_cpus();
217 class_destroy(cpuid_class); 220 class_destroy(cpuid_class);
218out_chrdev: 221out_chrdev:
219 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); 222 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void)
225{ 228{
226 int cpu = 0; 229 int cpu = 0;
227 230
231 get_online_cpus();
228 for_each_online_cpu(cpu) 232 for_each_online_cpu(cpu)
229 cpuid_device_destroy(cpu); 233 cpuid_device_destroy(cpu);
230 class_destroy(cpuid_class); 234 class_destroy(cpuid_class);
231 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); 235 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
232 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); 236 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
237 put_online_cpus();
233} 238}
234 239
235module_init(cpuid_init); 240module_init(cpuid_init);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3ae2ced4a874..b1581527a236 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = {
342 .xlate = ioapic_xlate, 342 .xlate = ioapic_xlate,
343}; 343};
344 344
345static void dt_add_ioapic_domain(unsigned int ioapic_num,
346 struct device_node *np)
347{
348 struct irq_domain *id;
349 struct mp_ioapic_gsi *gsi_cfg;
350 int ret;
351 int num;
352
353 gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
354 num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
355
356 id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
357 (void *)ioapic_num);
358 BUG_ON(!id);
359 if (gsi_cfg->gsi_base == 0) {
360 /*
361 * The first NR_IRQS_LEGACY irq descs are allocated in
362 * early_irq_init() and need just a mapping. The
363 * remaining irqs need both. All of them are preallocated
364 * and assigned so we can keep the 1:1 mapping which the ioapic
365 * is having.
366 */
367 ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
368 if (ret)
369 pr_err("Error mapping legacy IRQs: %d\n", ret);
370
371 if (num > NR_IRQS_LEGACY) {
372 ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
373 NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
374 if (ret)
375 pr_err("Error creating mapping for the "
376 "remaining IRQs: %d\n", ret);
377 }
378 irq_set_default_host(id);
379 } else {
380 ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
381 if (ret)
382 pr_err("Error creating IRQ mapping: %d\n", ret);
383 }
384}
385
345static void __init ioapic_add_ofnode(struct device_node *np) 386static void __init ioapic_add_ofnode(struct device_node *np)
346{ 387{
347 struct resource r; 388 struct resource r;
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np)
356 397
357 for (i = 0; i < nr_ioapics; i++) { 398 for (i = 0; i < nr_ioapics; i++) {
358 if (r.start == mpc_ioapic_addr(i)) { 399 if (r.start == mpc_ioapic_addr(i)) {
359 struct irq_domain *id; 400 dt_add_ioapic_domain(i, np);
360 struct mp_ioapic_gsi *gsi_cfg;
361
362 gsi_cfg = mp_ioapic_gsi_routing(i);
363
364 id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
365 &ioapic_irq_domain_ops,
366 (void*)i);
367 BUG_ON(!id);
368 return; 401 return;
369 } 402 }
370 } 403 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f28837476..0750e3ba87c0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -57,6 +57,7 @@
57#include <asm/cpufeature.h> 57#include <asm/cpufeature.h>
58#include <asm/alternative-asm.h> 58#include <asm/alternative-asm.h>
59#include <asm/asm.h> 59#include <asm/asm.h>
60#include <asm/smap.h>
60 61
61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 62/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
62#include <linux/elf-em.h> 63#include <linux/elf-em.h>
@@ -407,7 +408,9 @@ sysenter_past_esp:
407 */ 408 */
408 cmpl $__PAGE_OFFSET-3,%ebp 409 cmpl $__PAGE_OFFSET-3,%ebp
409 jae syscall_fault 410 jae syscall_fault
411 ASM_STAC
4101: movl (%ebp),%ebp 4121: movl (%ebp),%ebp
413 ASM_CLAC
411 movl %ebp,PT_EBP(%esp) 414 movl %ebp,PT_EBP(%esp)
412 _ASM_EXTABLE(1b,syscall_fault) 415 _ASM_EXTABLE(1b,syscall_fault)
413 416
@@ -488,6 +491,7 @@ ENDPROC(ia32_sysenter_target)
488 # system call handler stub 491 # system call handler stub
489ENTRY(system_call) 492ENTRY(system_call)
490 RING0_INT_FRAME # can't unwind into user space anyway 493 RING0_INT_FRAME # can't unwind into user space anyway
494 ASM_CLAC
491 pushl_cfi %eax # save orig_eax 495 pushl_cfi %eax # save orig_eax
492 SAVE_ALL 496 SAVE_ALL
493 GET_THREAD_INFO(%ebp) 497 GET_THREAD_INFO(%ebp)
@@ -670,6 +674,7 @@ END(syscall_exit_work)
670 674
671 RING0_INT_FRAME # can't unwind into user space anyway 675 RING0_INT_FRAME # can't unwind into user space anyway
672syscall_fault: 676syscall_fault:
677 ASM_CLAC
673 GET_THREAD_INFO(%ebp) 678 GET_THREAD_INFO(%ebp)
674 movl $-EFAULT,PT_EAX(%esp) 679 movl $-EFAULT,PT_EAX(%esp)
675 jmp resume_userspace 680 jmp resume_userspace
@@ -825,6 +830,7 @@ END(interrupt)
825 */ 830 */
826 .p2align CONFIG_X86_L1_CACHE_SHIFT 831 .p2align CONFIG_X86_L1_CACHE_SHIFT
827common_interrupt: 832common_interrupt:
833 ASM_CLAC
828 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ 834 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
829 SAVE_ALL 835 SAVE_ALL
830 TRACE_IRQS_OFF 836 TRACE_IRQS_OFF
@@ -841,6 +847,7 @@ ENDPROC(common_interrupt)
841#define BUILD_INTERRUPT3(name, nr, fn) \ 847#define BUILD_INTERRUPT3(name, nr, fn) \
842ENTRY(name) \ 848ENTRY(name) \
843 RING0_INT_FRAME; \ 849 RING0_INT_FRAME; \
850 ASM_CLAC; \
844 pushl_cfi $~(nr); \ 851 pushl_cfi $~(nr); \
845 SAVE_ALL; \ 852 SAVE_ALL; \
846 TRACE_IRQS_OFF \ 853 TRACE_IRQS_OFF \
@@ -857,6 +864,7 @@ ENDPROC(name)
857 864
858ENTRY(coprocessor_error) 865ENTRY(coprocessor_error)
859 RING0_INT_FRAME 866 RING0_INT_FRAME
867 ASM_CLAC
860 pushl_cfi $0 868 pushl_cfi $0
861 pushl_cfi $do_coprocessor_error 869 pushl_cfi $do_coprocessor_error
862 jmp error_code 870 jmp error_code
@@ -865,6 +873,7 @@ END(coprocessor_error)
865 873
866ENTRY(simd_coprocessor_error) 874ENTRY(simd_coprocessor_error)
867 RING0_INT_FRAME 875 RING0_INT_FRAME
876 ASM_CLAC
868 pushl_cfi $0 877 pushl_cfi $0
869#ifdef CONFIG_X86_INVD_BUG 878#ifdef CONFIG_X86_INVD_BUG
870 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 879 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
@@ -886,6 +895,7 @@ END(simd_coprocessor_error)
886 895
887ENTRY(device_not_available) 896ENTRY(device_not_available)
888 RING0_INT_FRAME 897 RING0_INT_FRAME
898 ASM_CLAC
889 pushl_cfi $-1 # mark this as an int 899 pushl_cfi $-1 # mark this as an int
890 pushl_cfi $do_device_not_available 900 pushl_cfi $do_device_not_available
891 jmp error_code 901 jmp error_code
@@ -906,6 +916,7 @@ END(native_irq_enable_sysexit)
906 916
907ENTRY(overflow) 917ENTRY(overflow)
908 RING0_INT_FRAME 918 RING0_INT_FRAME
919 ASM_CLAC
909 pushl_cfi $0 920 pushl_cfi $0
910 pushl_cfi $do_overflow 921 pushl_cfi $do_overflow
911 jmp error_code 922 jmp error_code
@@ -914,6 +925,7 @@ END(overflow)
914 925
915ENTRY(bounds) 926ENTRY(bounds)
916 RING0_INT_FRAME 927 RING0_INT_FRAME
928 ASM_CLAC
917 pushl_cfi $0 929 pushl_cfi $0
918 pushl_cfi $do_bounds 930 pushl_cfi $do_bounds
919 jmp error_code 931 jmp error_code
@@ -922,6 +934,7 @@ END(bounds)
922 934
923ENTRY(invalid_op) 935ENTRY(invalid_op)
924 RING0_INT_FRAME 936 RING0_INT_FRAME
937 ASM_CLAC
925 pushl_cfi $0 938 pushl_cfi $0
926 pushl_cfi $do_invalid_op 939 pushl_cfi $do_invalid_op
927 jmp error_code 940 jmp error_code
@@ -930,6 +943,7 @@ END(invalid_op)
930 943
931ENTRY(coprocessor_segment_overrun) 944ENTRY(coprocessor_segment_overrun)
932 RING0_INT_FRAME 945 RING0_INT_FRAME
946 ASM_CLAC
933 pushl_cfi $0 947 pushl_cfi $0
934 pushl_cfi $do_coprocessor_segment_overrun 948 pushl_cfi $do_coprocessor_segment_overrun
935 jmp error_code 949 jmp error_code
@@ -938,6 +952,7 @@ END(coprocessor_segment_overrun)
938 952
939ENTRY(invalid_TSS) 953ENTRY(invalid_TSS)
940 RING0_EC_FRAME 954 RING0_EC_FRAME
955 ASM_CLAC
941 pushl_cfi $do_invalid_TSS 956 pushl_cfi $do_invalid_TSS
942 jmp error_code 957 jmp error_code
943 CFI_ENDPROC 958 CFI_ENDPROC
@@ -945,6 +960,7 @@ END(invalid_TSS)
945 960
946ENTRY(segment_not_present) 961ENTRY(segment_not_present)
947 RING0_EC_FRAME 962 RING0_EC_FRAME
963 ASM_CLAC
948 pushl_cfi $do_segment_not_present 964 pushl_cfi $do_segment_not_present
949 jmp error_code 965 jmp error_code
950 CFI_ENDPROC 966 CFI_ENDPROC
@@ -952,6 +968,7 @@ END(segment_not_present)
952 968
953ENTRY(stack_segment) 969ENTRY(stack_segment)
954 RING0_EC_FRAME 970 RING0_EC_FRAME
971 ASM_CLAC
955 pushl_cfi $do_stack_segment 972 pushl_cfi $do_stack_segment
956 jmp error_code 973 jmp error_code
957 CFI_ENDPROC 974 CFI_ENDPROC
@@ -959,6 +976,7 @@ END(stack_segment)
959 976
960ENTRY(alignment_check) 977ENTRY(alignment_check)
961 RING0_EC_FRAME 978 RING0_EC_FRAME
979 ASM_CLAC
962 pushl_cfi $do_alignment_check 980 pushl_cfi $do_alignment_check
963 jmp error_code 981 jmp error_code
964 CFI_ENDPROC 982 CFI_ENDPROC
@@ -966,6 +984,7 @@ END(alignment_check)
966 984
967ENTRY(divide_error) 985ENTRY(divide_error)
968 RING0_INT_FRAME 986 RING0_INT_FRAME
987 ASM_CLAC
969 pushl_cfi $0 # no error code 988 pushl_cfi $0 # no error code
970 pushl_cfi $do_divide_error 989 pushl_cfi $do_divide_error
971 jmp error_code 990 jmp error_code
@@ -975,6 +994,7 @@ END(divide_error)
975#ifdef CONFIG_X86_MCE 994#ifdef CONFIG_X86_MCE
976ENTRY(machine_check) 995ENTRY(machine_check)
977 RING0_INT_FRAME 996 RING0_INT_FRAME
997 ASM_CLAC
978 pushl_cfi $0 998 pushl_cfi $0
979 pushl_cfi machine_check_vector 999 pushl_cfi machine_check_vector
980 jmp error_code 1000 jmp error_code
@@ -984,6 +1004,7 @@ END(machine_check)
984 1004
985ENTRY(spurious_interrupt_bug) 1005ENTRY(spurious_interrupt_bug)
986 RING0_INT_FRAME 1006 RING0_INT_FRAME
1007 ASM_CLAC
987 pushl_cfi $0 1008 pushl_cfi $0
988 pushl_cfi $do_spurious_interrupt_bug 1009 pushl_cfi $do_spurious_interrupt_bug
989 jmp error_code 1010 jmp error_code
@@ -1109,17 +1130,21 @@ ENTRY(ftrace_caller)
1109 pushl %eax 1130 pushl %eax
1110 pushl %ecx 1131 pushl %ecx
1111 pushl %edx 1132 pushl %edx
1112 movl 0xc(%esp), %eax 1133 pushl $0 /* Pass NULL as regs pointer */
1134 movl 4*4(%esp), %eax
1113 movl 0x4(%ebp), %edx 1135 movl 0x4(%ebp), %edx
1136 leal function_trace_op, %ecx
1114 subl $MCOUNT_INSN_SIZE, %eax 1137 subl $MCOUNT_INSN_SIZE, %eax
1115 1138
1116.globl ftrace_call 1139.globl ftrace_call
1117ftrace_call: 1140ftrace_call:
1118 call ftrace_stub 1141 call ftrace_stub
1119 1142
1143 addl $4,%esp /* skip NULL pointer */
1120 popl %edx 1144 popl %edx
1121 popl %ecx 1145 popl %ecx
1122 popl %eax 1146 popl %eax
1147ftrace_ret:
1123#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1148#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1124.globl ftrace_graph_call 1149.globl ftrace_graph_call
1125ftrace_graph_call: 1150ftrace_graph_call:
@@ -1131,6 +1156,71 @@ ftrace_stub:
1131 ret 1156 ret
1132END(ftrace_caller) 1157END(ftrace_caller)
1133 1158
1159ENTRY(ftrace_regs_caller)
1160 pushf /* push flags before compare (in cs location) */
1161 cmpl $0, function_trace_stop
1162 jne ftrace_restore_flags
1163
1164 /*
1165 * i386 does not save SS and ESP when coming from kernel.
1166 * Instead, to get sp, &regs->sp is used (see ptrace.h).
1167 * Unfortunately, that means eflags must be at the same location
1168 * as the current return ip is. We move the return ip into the
1169 * ip location, and move flags into the return ip location.
1170 */
1171 pushl 4(%esp) /* save return ip into ip slot */
1172
1173 pushl $0 /* Load 0 into orig_ax */
1174 pushl %gs
1175 pushl %fs
1176 pushl %es
1177 pushl %ds
1178 pushl %eax
1179 pushl %ebp
1180 pushl %edi
1181 pushl %esi
1182 pushl %edx
1183 pushl %ecx
1184 pushl %ebx
1185
1186 movl 13*4(%esp), %eax /* Get the saved flags */
1187 movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
1188 /* clobbering return ip */
1189 movl $__KERNEL_CS,13*4(%esp)
1190
1191 movl 12*4(%esp), %eax /* Load ip (1st parameter) */
1192 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
1193 movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
1194 leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
1195 pushl %esp /* Save pt_regs as 4th parameter */
1196
1197GLOBAL(ftrace_regs_call)
1198 call ftrace_stub
1199
1200 addl $4, %esp /* Skip pt_regs */
1201 movl 14*4(%esp), %eax /* Move flags back into cs */
1202 movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
1203 movl 12*4(%esp), %eax /* Get return ip from regs->ip */
1204 movl %eax, 14*4(%esp) /* Put return ip back for ret */
1205
1206 popl %ebx
1207 popl %ecx
1208 popl %edx
1209 popl %esi
1210 popl %edi
1211 popl %ebp
1212 popl %eax
1213 popl %ds
1214 popl %es
1215 popl %fs
1216 popl %gs
1217 addl $8, %esp /* Skip orig_ax and ip */
1218 popf /* Pop flags at end (no addl to corrupt flags) */
1219 jmp ftrace_ret
1220
1221ftrace_restore_flags:
1222 popf
1223 jmp ftrace_stub
1134#else /* ! CONFIG_DYNAMIC_FTRACE */ 1224#else /* ! CONFIG_DYNAMIC_FTRACE */
1135 1225
1136ENTRY(mcount) 1226ENTRY(mcount)
@@ -1171,9 +1261,6 @@ END(mcount)
1171 1261
1172#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1262#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1173ENTRY(ftrace_graph_caller) 1263ENTRY(ftrace_graph_caller)
1174 cmpl $0, function_trace_stop
1175 jne ftrace_stub
1176
1177 pushl %eax 1264 pushl %eax
1178 pushl %ecx 1265 pushl %ecx
1179 pushl %edx 1266 pushl %edx
@@ -1207,6 +1294,7 @@ return_to_handler:
1207 1294
1208ENTRY(page_fault) 1295ENTRY(page_fault)
1209 RING0_EC_FRAME 1296 RING0_EC_FRAME
1297 ASM_CLAC
1210 pushl_cfi $do_page_fault 1298 pushl_cfi $do_page_fault
1211 ALIGN 1299 ALIGN
1212error_code: 1300error_code:
@@ -1279,6 +1367,7 @@ END(page_fault)
1279 1367
1280ENTRY(debug) 1368ENTRY(debug)
1281 RING0_INT_FRAME 1369 RING0_INT_FRAME
1370 ASM_CLAC
1282 cmpl $ia32_sysenter_target,(%esp) 1371 cmpl $ia32_sysenter_target,(%esp)
1283 jne debug_stack_correct 1372 jne debug_stack_correct
1284 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn 1373 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
@@ -1303,6 +1392,7 @@ END(debug)
1303 */ 1392 */
1304ENTRY(nmi) 1393ENTRY(nmi)
1305 RING0_INT_FRAME 1394 RING0_INT_FRAME
1395 ASM_CLAC
1306 pushl_cfi %eax 1396 pushl_cfi %eax
1307 movl %ss, %eax 1397 movl %ss, %eax
1308 cmpw $__ESPFIX_SS, %ax 1398 cmpw $__ESPFIX_SS, %ax
@@ -1373,6 +1463,7 @@ END(nmi)
1373 1463
1374ENTRY(int3) 1464ENTRY(int3)
1375 RING0_INT_FRAME 1465 RING0_INT_FRAME
1466 ASM_CLAC
1376 pushl_cfi $-1 # mark this as an int 1467 pushl_cfi $-1 # mark this as an int
1377 SAVE_ALL 1468 SAVE_ALL
1378 TRACE_IRQS_OFF 1469 TRACE_IRQS_OFF
@@ -1393,6 +1484,7 @@ END(general_protection)
1393#ifdef CONFIG_KVM_GUEST 1484#ifdef CONFIG_KVM_GUEST
1394ENTRY(async_page_fault) 1485ENTRY(async_page_fault)
1395 RING0_EC_FRAME 1486 RING0_EC_FRAME
1487 ASM_CLAC
1396 pushl_cfi $do_async_page_fault 1488 pushl_cfi $do_async_page_fault
1397 jmp error_code 1489 jmp error_code
1398 CFI_ENDPROC 1490 CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834f..44531acd9a81 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,6 +56,8 @@
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <asm/asm.h> 58#include <asm/asm.h>
59#include <asm/rcu.h>
60#include <asm/smap.h>
59#include <linux/err.h> 61#include <linux/err.h>
60 62
61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 63/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -68,25 +70,51 @@
68 .section .entry.text, "ax" 70 .section .entry.text, "ax"
69 71
70#ifdef CONFIG_FUNCTION_TRACER 72#ifdef CONFIG_FUNCTION_TRACER
73
74#ifdef CC_USING_FENTRY
75# define function_hook __fentry__
76#else
77# define function_hook mcount
78#endif
79
71#ifdef CONFIG_DYNAMIC_FTRACE 80#ifdef CONFIG_DYNAMIC_FTRACE
72ENTRY(mcount) 81
82ENTRY(function_hook)
73 retq 83 retq
74END(mcount) 84END(function_hook)
85
86/* skip is set if stack has been adjusted */
87.macro ftrace_caller_setup skip=0
88 MCOUNT_SAVE_FRAME \skip
89
90 /* Load the ftrace_ops into the 3rd parameter */
91 leaq function_trace_op, %rdx
92
93 /* Load ip into the first parameter */
94 movq RIP(%rsp), %rdi
95 subq $MCOUNT_INSN_SIZE, %rdi
96 /* Load the parent_ip into the second parameter */
97#ifdef CC_USING_FENTRY
98 movq SS+16(%rsp), %rsi
99#else
100 movq 8(%rbp), %rsi
101#endif
102.endm
75 103
76ENTRY(ftrace_caller) 104ENTRY(ftrace_caller)
105 /* Check if tracing was disabled (quick check) */
77 cmpl $0, function_trace_stop 106 cmpl $0, function_trace_stop
78 jne ftrace_stub 107 jne ftrace_stub
79 108
80 MCOUNT_SAVE_FRAME 109 ftrace_caller_setup
81 110 /* regs go into 4th parameter (but make it NULL) */
82 movq 0x38(%rsp), %rdi 111 movq $0, %rcx
83 movq 8(%rbp), %rsi
84 subq $MCOUNT_INSN_SIZE, %rdi
85 112
86GLOBAL(ftrace_call) 113GLOBAL(ftrace_call)
87 call ftrace_stub 114 call ftrace_stub
88 115
89 MCOUNT_RESTORE_FRAME 116 MCOUNT_RESTORE_FRAME
117ftrace_return:
90 118
91#ifdef CONFIG_FUNCTION_GRAPH_TRACER 119#ifdef CONFIG_FUNCTION_GRAPH_TRACER
92GLOBAL(ftrace_graph_call) 120GLOBAL(ftrace_graph_call)
@@ -97,8 +125,78 @@ GLOBAL(ftrace_stub)
97 retq 125 retq
98END(ftrace_caller) 126END(ftrace_caller)
99 127
128ENTRY(ftrace_regs_caller)
129 /* Save the current flags before compare (in SS location)*/
130 pushfq
131
132 /* Check if tracing was disabled (quick check) */
133 cmpl $0, function_trace_stop
134 jne ftrace_restore_flags
135
136 /* skip=8 to skip flags saved in SS */
137 ftrace_caller_setup 8
138
139 /* Save the rest of pt_regs */
140 movq %r15, R15(%rsp)
141 movq %r14, R14(%rsp)
142 movq %r13, R13(%rsp)
143 movq %r12, R12(%rsp)
144 movq %r11, R11(%rsp)
145 movq %r10, R10(%rsp)
146 movq %rbp, RBP(%rsp)
147 movq %rbx, RBX(%rsp)
148 /* Copy saved flags */
149 movq SS(%rsp), %rcx
150 movq %rcx, EFLAGS(%rsp)
151 /* Kernel segments */
152 movq $__KERNEL_DS, %rcx
153 movq %rcx, SS(%rsp)
154 movq $__KERNEL_CS, %rcx
155 movq %rcx, CS(%rsp)
156 /* Stack - skipping return address */
157 leaq SS+16(%rsp), %rcx
158 movq %rcx, RSP(%rsp)
159
160 /* regs go into 4th parameter */
161 leaq (%rsp), %rcx
162
163GLOBAL(ftrace_regs_call)
164 call ftrace_stub
165
166 /* Copy flags back to SS, to restore them */
167 movq EFLAGS(%rsp), %rax
168 movq %rax, SS(%rsp)
169
170 /* Handlers can change the RIP */
171 movq RIP(%rsp), %rax
172 movq %rax, SS+8(%rsp)
173
174 /* restore the rest of pt_regs */
175 movq R15(%rsp), %r15
176 movq R14(%rsp), %r14
177 movq R13(%rsp), %r13
178 movq R12(%rsp), %r12
179 movq R10(%rsp), %r10
180 movq RBP(%rsp), %rbp
181 movq RBX(%rsp), %rbx
182
183 /* skip=8 to skip flags saved in SS */
184 MCOUNT_RESTORE_FRAME 8
185
186 /* Restore flags */
187 popfq
188
189 jmp ftrace_return
190ftrace_restore_flags:
191 popfq
192 jmp ftrace_stub
193
194END(ftrace_regs_caller)
195
196
100#else /* ! CONFIG_DYNAMIC_FTRACE */ 197#else /* ! CONFIG_DYNAMIC_FTRACE */
101ENTRY(mcount) 198
199ENTRY(function_hook)
102 cmpl $0, function_trace_stop 200 cmpl $0, function_trace_stop
103 jne ftrace_stub 201 jne ftrace_stub
104 202
@@ -119,8 +217,12 @@ GLOBAL(ftrace_stub)
119trace: 217trace:
120 MCOUNT_SAVE_FRAME 218 MCOUNT_SAVE_FRAME
121 219
122 movq 0x38(%rsp), %rdi 220 movq RIP(%rsp), %rdi
221#ifdef CC_USING_FENTRY
222 movq SS+16(%rsp), %rsi
223#else
123 movq 8(%rbp), %rsi 224 movq 8(%rbp), %rsi
225#endif
124 subq $MCOUNT_INSN_SIZE, %rdi 226 subq $MCOUNT_INSN_SIZE, %rdi
125 227
126 call *ftrace_trace_function 228 call *ftrace_trace_function
@@ -128,20 +230,22 @@ trace:
128 MCOUNT_RESTORE_FRAME 230 MCOUNT_RESTORE_FRAME
129 231
130 jmp ftrace_stub 232 jmp ftrace_stub
131END(mcount) 233END(function_hook)
132#endif /* CONFIG_DYNAMIC_FTRACE */ 234#endif /* CONFIG_DYNAMIC_FTRACE */
133#endif /* CONFIG_FUNCTION_TRACER */ 235#endif /* CONFIG_FUNCTION_TRACER */
134 236
135#ifdef CONFIG_FUNCTION_GRAPH_TRACER 237#ifdef CONFIG_FUNCTION_GRAPH_TRACER
136ENTRY(ftrace_graph_caller) 238ENTRY(ftrace_graph_caller)
137 cmpl $0, function_trace_stop
138 jne ftrace_stub
139
140 MCOUNT_SAVE_FRAME 239 MCOUNT_SAVE_FRAME
141 240
241#ifdef CC_USING_FENTRY
242 leaq SS+16(%rsp), %rdi
243 movq $0, %rdx /* No framepointers needed */
244#else
142 leaq 8(%rbp), %rdi 245 leaq 8(%rbp), %rdi
143 movq 0x38(%rsp), %rsi
144 movq (%rbp), %rdx 246 movq (%rbp), %rdx
247#endif
248 movq RIP(%rsp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rsi 249 subq $MCOUNT_INSN_SIZE, %rsi
146 250
147 call prepare_ftrace_return 251 call prepare_ftrace_return
@@ -342,15 +446,15 @@ ENDPROC(native_usergs_sysret64)
342 .macro SAVE_ARGS_IRQ 446 .macro SAVE_ARGS_IRQ
343 cld 447 cld
344 /* start from rbp in pt_regs and jump over */ 448 /* start from rbp in pt_regs and jump over */
345 movq_cfi rdi, RDI-RBP 449 movq_cfi rdi, (RDI-RBP)
346 movq_cfi rsi, RSI-RBP 450 movq_cfi rsi, (RSI-RBP)
347 movq_cfi rdx, RDX-RBP 451 movq_cfi rdx, (RDX-RBP)
348 movq_cfi rcx, RCX-RBP 452 movq_cfi rcx, (RCX-RBP)
349 movq_cfi rax, RAX-RBP 453 movq_cfi rax, (RAX-RBP)
350 movq_cfi r8, R8-RBP 454 movq_cfi r8, (R8-RBP)
351 movq_cfi r9, R9-RBP 455 movq_cfi r9, (R9-RBP)
352 movq_cfi r10, R10-RBP 456 movq_cfi r10, (R10-RBP)
353 movq_cfi r11, R11-RBP 457 movq_cfi r11, (R11-RBP)
354 458
355 /* Save rbp so that we can unwind from get_irq_regs() */ 459 /* Save rbp so that we can unwind from get_irq_regs() */
356 movq_cfi rbp, 0 460 movq_cfi rbp, 0
@@ -384,7 +488,7 @@ ENDPROC(native_usergs_sysret64)
384 .endm 488 .endm
385 489
386ENTRY(save_rest) 490ENTRY(save_rest)
387 PARTIAL_FRAME 1 REST_SKIP+8 491 PARTIAL_FRAME 1 (REST_SKIP+8)
388 movq 5*8+16(%rsp), %r11 /* save return address */ 492 movq 5*8+16(%rsp), %r11 /* save return address */
389 movq_cfi rbx, RBX+16 493 movq_cfi rbx, RBX+16
390 movq_cfi rbp, RBP+16 494 movq_cfi rbp, RBP+16
@@ -440,7 +544,7 @@ ENTRY(ret_from_fork)
440 544
441 LOCK ; btr $TIF_FORK,TI_flags(%r8) 545 LOCK ; btr $TIF_FORK,TI_flags(%r8)
442 546
443 pushq_cfi kernel_eflags(%rip) 547 pushq_cfi $0x0002
444 popfq_cfi # reset kernel eflags 548 popfq_cfi # reset kernel eflags
445 549
446 call schedule_tail # rdi: 'prev' task parameter 550 call schedule_tail # rdi: 'prev' task parameter
@@ -465,7 +569,8 @@ END(ret_from_fork)
465 * System call entry. Up to 6 arguments in registers are supported. 569 * System call entry. Up to 6 arguments in registers are supported.
466 * 570 *
467 * SYSCALL does not save anything on the stack and does not change the 571 * SYSCALL does not save anything on the stack and does not change the
468 * stack pointer. 572 * stack pointer. However, it does mask the flags register for us, so
573 * CLD and CLAC are not needed.
469 */ 574 */
470 575
471/* 576/*
@@ -565,7 +670,7 @@ sysret_careful:
565 TRACE_IRQS_ON 670 TRACE_IRQS_ON
566 ENABLE_INTERRUPTS(CLBR_NONE) 671 ENABLE_INTERRUPTS(CLBR_NONE)
567 pushq_cfi %rdi 672 pushq_cfi %rdi
568 call schedule 673 SCHEDULE_USER
569 popq_cfi %rdi 674 popq_cfi %rdi
570 jmp sysret_check 675 jmp sysret_check
571 676
@@ -678,7 +783,7 @@ int_careful:
678 TRACE_IRQS_ON 783 TRACE_IRQS_ON
679 ENABLE_INTERRUPTS(CLBR_NONE) 784 ENABLE_INTERRUPTS(CLBR_NONE)
680 pushq_cfi %rdi 785 pushq_cfi %rdi
681 call schedule 786 SCHEDULE_USER
682 popq_cfi %rdi 787 popq_cfi %rdi
683 DISABLE_INTERRUPTS(CLBR_NONE) 788 DISABLE_INTERRUPTS(CLBR_NONE)
684 TRACE_IRQS_OFF 789 TRACE_IRQS_OFF
@@ -884,6 +989,7 @@ END(interrupt)
884 */ 989 */
885 .p2align CONFIG_X86_L1_CACHE_SHIFT 990 .p2align CONFIG_X86_L1_CACHE_SHIFT
886common_interrupt: 991common_interrupt:
992 ASM_CLAC
887 XCPT_FRAME 993 XCPT_FRAME
888 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 994 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
889 interrupt do_IRQ 995 interrupt do_IRQ
@@ -974,7 +1080,7 @@ retint_careful:
974 TRACE_IRQS_ON 1080 TRACE_IRQS_ON
975 ENABLE_INTERRUPTS(CLBR_NONE) 1081 ENABLE_INTERRUPTS(CLBR_NONE)
976 pushq_cfi %rdi 1082 pushq_cfi %rdi
977 call schedule 1083 SCHEDULE_USER
978 popq_cfi %rdi 1084 popq_cfi %rdi
979 GET_THREAD_INFO(%rcx) 1085 GET_THREAD_INFO(%rcx)
980 DISABLE_INTERRUPTS(CLBR_NONE) 1086 DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1023,6 +1129,7 @@ END(common_interrupt)
1023 */ 1129 */
1024.macro apicinterrupt num sym do_sym 1130.macro apicinterrupt num sym do_sym
1025ENTRY(\sym) 1131ENTRY(\sym)
1132 ASM_CLAC
1026 INTR_FRAME 1133 INTR_FRAME
1027 pushq_cfi $~(\num) 1134 pushq_cfi $~(\num)
1028.Lcommon_\sym: 1135.Lcommon_\sym:
@@ -1077,6 +1184,7 @@ apicinterrupt IRQ_WORK_VECTOR \
1077 */ 1184 */
1078.macro zeroentry sym do_sym 1185.macro zeroentry sym do_sym
1079ENTRY(\sym) 1186ENTRY(\sym)
1187 ASM_CLAC
1080 INTR_FRAME 1188 INTR_FRAME
1081 PARAVIRT_ADJUST_EXCEPTION_FRAME 1189 PARAVIRT_ADJUST_EXCEPTION_FRAME
1082 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1190 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1094,6 +1202,7 @@ END(\sym)
1094 1202
1095.macro paranoidzeroentry sym do_sym 1203.macro paranoidzeroentry sym do_sym
1096ENTRY(\sym) 1204ENTRY(\sym)
1205 ASM_CLAC
1097 INTR_FRAME 1206 INTR_FRAME
1098 PARAVIRT_ADJUST_EXCEPTION_FRAME 1207 PARAVIRT_ADJUST_EXCEPTION_FRAME
1099 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1208 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1112,6 +1221,7 @@ END(\sym)
1112#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) 1221#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1113.macro paranoidzeroentry_ist sym do_sym ist 1222.macro paranoidzeroentry_ist sym do_sym ist
1114ENTRY(\sym) 1223ENTRY(\sym)
1224 ASM_CLAC
1115 INTR_FRAME 1225 INTR_FRAME
1116 PARAVIRT_ADJUST_EXCEPTION_FRAME 1226 PARAVIRT_ADJUST_EXCEPTION_FRAME
1117 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1227 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1131,6 +1241,7 @@ END(\sym)
1131 1241
1132.macro errorentry sym do_sym 1242.macro errorentry sym do_sym
1133ENTRY(\sym) 1243ENTRY(\sym)
1244 ASM_CLAC
1134 XCPT_FRAME 1245 XCPT_FRAME
1135 PARAVIRT_ADJUST_EXCEPTION_FRAME 1246 PARAVIRT_ADJUST_EXCEPTION_FRAME
1136 subq $ORIG_RAX-R15, %rsp 1247 subq $ORIG_RAX-R15, %rsp
@@ -1149,6 +1260,7 @@ END(\sym)
1149 /* error code is on the stack already */ 1260 /* error code is on the stack already */
1150.macro paranoiderrorentry sym do_sym 1261.macro paranoiderrorentry sym do_sym
1151ENTRY(\sym) 1262ENTRY(\sym)
1263 ASM_CLAC
1152 XCPT_FRAME 1264 XCPT_FRAME
1153 PARAVIRT_ADJUST_EXCEPTION_FRAME 1265 PARAVIRT_ADJUST_EXCEPTION_FRAME
1154 subq $ORIG_RAX-R15, %rsp 1266 subq $ORIG_RAX-R15, %rsp
@@ -1449,7 +1561,7 @@ paranoid_userspace:
1449paranoid_schedule: 1561paranoid_schedule:
1450 TRACE_IRQS_ON 1562 TRACE_IRQS_ON
1451 ENABLE_INTERRUPTS(CLBR_ANY) 1563 ENABLE_INTERRUPTS(CLBR_ANY)
1452 call schedule 1564 SCHEDULE_USER
1453 DISABLE_INTERRUPTS(CLBR_ANY) 1565 DISABLE_INTERRUPTS(CLBR_ANY)
1454 TRACE_IRQS_OFF 1566 TRACE_IRQS_OFF
1455 jmp paranoid_userspace 1567 jmp paranoid_userspace
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c3a7cb4bf6e6..1d414029f1d8 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -206,6 +206,21 @@ static int
206ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 206ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
207 unsigned const char *new_code); 207 unsigned const char *new_code);
208 208
209/*
210 * Should never be called:
211 * As it is only called by __ftrace_replace_code() which is called by
212 * ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
213 * which is called to turn mcount into nops or nops into function calls
214 * but not to convert a function from not using regs to one that uses
215 * regs, which ftrace_modify_call() is for.
216 */
217int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
218 unsigned long addr)
219{
220 WARN_ON(1);
221 return -EINVAL;
222}
223
209int ftrace_update_ftrace_func(ftrace_func_t func) 224int ftrace_update_ftrace_func(ftrace_func_t func)
210{ 225{
211 unsigned long ip = (unsigned long)(&ftrace_call); 226 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
220 235
221 ret = ftrace_modify_code(ip, old, new); 236 ret = ftrace_modify_code(ip, old, new);
222 237
238 /* Also update the regs callback function */
239 if (!ret) {
240 ip = (unsigned long)(&ftrace_regs_call);
241 memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
242 new = ftrace_call_replace(ip, (unsigned long)func);
243 ret = ftrace_modify_code(ip, old, new);
244 }
245
223 atomic_dec(&modifying_ftrace_code); 246 atomic_dec(&modifying_ftrace_code);
224 247
225 return ret; 248 return ret;
@@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
299 return add_break(rec->ip, old); 322 return add_break(rec->ip, old);
300} 323}
301 324
325/*
326 * If the record has the FTRACE_FL_REGS set, that means that it
327 * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
328 * is not not set, then it wants to convert to the normal callback.
329 */
330static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
331{
332 if (rec->flags & FTRACE_FL_REGS)
333 return (unsigned long)FTRACE_REGS_ADDR;
334 else
335 return (unsigned long)FTRACE_ADDR;
336}
337
338/*
339 * The FTRACE_FL_REGS_EN is set when the record already points to
340 * a function that saves all the regs. Basically the '_EN' version
341 * represents the current state of the function.
342 */
343static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
344{
345 if (rec->flags & FTRACE_FL_REGS_EN)
346 return (unsigned long)FTRACE_REGS_ADDR;
347 else
348 return (unsigned long)FTRACE_ADDR;
349}
350
302static int add_breakpoints(struct dyn_ftrace *rec, int enable) 351static int add_breakpoints(struct dyn_ftrace *rec, int enable)
303{ 352{
304 unsigned long ftrace_addr; 353 unsigned long ftrace_addr;
@@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
306 355
307 ret = ftrace_test_record(rec, enable); 356 ret = ftrace_test_record(rec, enable);
308 357
309 ftrace_addr = (unsigned long)FTRACE_ADDR; 358 ftrace_addr = get_ftrace_addr(rec);
310 359
311 switch (ret) { 360 switch (ret) {
312 case FTRACE_UPDATE_IGNORE: 361 case FTRACE_UPDATE_IGNORE:
@@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
316 /* converting nop to call */ 365 /* converting nop to call */
317 return add_brk_on_nop(rec); 366 return add_brk_on_nop(rec);
318 367
368 case FTRACE_UPDATE_MODIFY_CALL_REGS:
369 case FTRACE_UPDATE_MODIFY_CALL:
370 ftrace_addr = get_ftrace_old_addr(rec);
371 /* fall through */
319 case FTRACE_UPDATE_MAKE_NOP: 372 case FTRACE_UPDATE_MAKE_NOP:
320 /* converting a call to a nop */ 373 /* converting a call to a nop */
321 return add_brk_on_call(rec, ftrace_addr); 374 return add_brk_on_call(rec, ftrace_addr);
@@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
360 * If not, don't touch the breakpoint, we make just create 413 * If not, don't touch the breakpoint, we make just create
361 * a disaster. 414 * a disaster.
362 */ 415 */
363 ftrace_addr = (unsigned long)FTRACE_ADDR; 416 ftrace_addr = get_ftrace_addr(rec);
417 nop = ftrace_call_replace(ip, ftrace_addr);
418
419 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
420 goto update;
421
422 /* Check both ftrace_addr and ftrace_old_addr */
423 ftrace_addr = get_ftrace_old_addr(rec);
364 nop = ftrace_call_replace(ip, ftrace_addr); 424 nop = ftrace_call_replace(ip, ftrace_addr);
365 425
366 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) 426 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
367 return -EINVAL; 427 return -EINVAL;
368 } 428 }
369 429
430 update:
370 return probe_kernel_write((void *)ip, &nop[0], 1); 431 return probe_kernel_write((void *)ip, &nop[0], 1);
371} 432}
372 433
@@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable)
405 466
406 ret = ftrace_test_record(rec, enable); 467 ret = ftrace_test_record(rec, enable);
407 468
408 ftrace_addr = (unsigned long)FTRACE_ADDR; 469 ftrace_addr = get_ftrace_addr(rec);
409 470
410 switch (ret) { 471 switch (ret) {
411 case FTRACE_UPDATE_IGNORE: 472 case FTRACE_UPDATE_IGNORE:
412 return 0; 473 return 0;
413 474
475 case FTRACE_UPDATE_MODIFY_CALL_REGS:
476 case FTRACE_UPDATE_MODIFY_CALL:
414 case FTRACE_UPDATE_MAKE_CALL: 477 case FTRACE_UPDATE_MAKE_CALL:
415 /* converting nop to call */ 478 /* converting nop to call */
416 return add_update_call(rec, ftrace_addr); 479 return add_update_call(rec, ftrace_addr);
@@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
455 518
456 ret = ftrace_update_record(rec, enable); 519 ret = ftrace_update_record(rec, enable);
457 520
458 ftrace_addr = (unsigned long)FTRACE_ADDR; 521 ftrace_addr = get_ftrace_addr(rec);
459 522
460 switch (ret) { 523 switch (ret) {
461 case FTRACE_UPDATE_IGNORE: 524 case FTRACE_UPDATE_IGNORE:
462 return 0; 525 return 0;
463 526
527 case FTRACE_UPDATE_MODIFY_CALL_REGS:
528 case FTRACE_UPDATE_MODIFY_CALL:
464 case FTRACE_UPDATE_MAKE_CALL: 529 case FTRACE_UPDATE_MAKE_CALL:
465 /* converting nop to call */ 530 /* converting nop to call */
466 return finish_update_call(rec, ftrace_addr); 531 return finish_update_call(rec, ftrace_addr);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d42ab17b7397..957a47aec64e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -287,27 +287,28 @@ ENTRY(startup_32_smp)
287 leal -__PAGE_OFFSET(%ecx),%esp 287 leal -__PAGE_OFFSET(%ecx),%esp
288 288
289default_entry: 289default_entry:
290
291/* 290/*
292 * New page tables may be in 4Mbyte page mode and may 291 * New page tables may be in 4Mbyte page mode and may
293 * be using the global pages. 292 * be using the global pages.
294 * 293 *
295 * NOTE! If we are on a 486 we may have no cr4 at all! 294 * NOTE! If we are on a 486 we may have no cr4 at all!
296 * So we do not try to touch it unless we really have 295 * Specifically, cr4 exists if and only if CPUID exists,
297 * some bits in it to set. This won't work if the BSP 296 * which in turn exists if and only if EFLAGS.ID exists.
298 * implements cr4 but this AP does not -- very unlikely
299 * but be warned! The same applies to the pse feature
300 * if not equally supported. --macro
301 *
302 * NOTE! We have to correct for the fact that we're
303 * not yet offset PAGE_OFFSET..
304 */ 297 */
305#define cr4_bits pa(mmu_cr4_features) 298 movl $X86_EFLAGS_ID,%ecx
306 movl cr4_bits,%edx 299 pushl %ecx
307 andl %edx,%edx 300 popfl
308 jz 6f 301 pushfl
309 movl %cr4,%eax # Turn on paging options (PSE,PAE,..) 302 popl %eax
310 orl %edx,%eax 303 pushl $0
304 popfl
305 pushfl
306 popl %edx
307 xorl %edx,%eax
308 testl %ecx,%eax
309 jz 6f # No ID flag = no CPUID = no CR4
310
311 movl pa(mmu_cr4_features),%eax
311 movl %eax,%cr4 312 movl %eax,%cr4
312 313
313 testb $X86_CR4_PAE, %al # check if PAE is enabled 314 testb $X86_CR4_PAE, %al # check if PAE is enabled
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f250431fb505..675a05012449 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -19,24 +19,17 @@
19#include <asm/fpu-internal.h> 19#include <asm/fpu-internal.h>
20#include <asm/user.h> 20#include <asm/user.h>
21 21
22#ifdef CONFIG_X86_64
23# include <asm/sigcontext32.h>
24# include <asm/user32.h>
25#else
26# define save_i387_xstate_ia32 save_i387_xstate
27# define restore_i387_xstate_ia32 restore_i387_xstate
28# define _fpstate_ia32 _fpstate
29# define _xstate_ia32 _xstate
30# define sig_xstate_ia32_size sig_xstate_size
31# define fx_sw_reserved_ia32 fx_sw_reserved
32# define user_i387_ia32_struct user_i387_struct
33# define user32_fxsr_struct user_fxsr_struct
34#endif
35
36/* 22/*
37 * Were we in an interrupt that interrupted kernel mode? 23 * Were we in an interrupt that interrupted kernel mode?
38 * 24 *
39 * We can do a kernel_fpu_begin/end() pair *ONLY* if that 25 * For now, with eagerfpu we will return interrupted kernel FPU
26 * state as not-idle. TBD: Ideally we can change the return value
27 * to something like __thread_has_fpu(current). But we need to
28 * be careful of doing __thread_clear_has_fpu() before saving
29 * the FPU etc for supporting nested uses etc. For now, take
30 * the simple route!
31 *
32 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
40 * pair does nothing at all: the thread must not have fpu (so 33 * pair does nothing at all: the thread must not have fpu (so
41 * that we don't try to save the FPU state), and TS must 34 * that we don't try to save the FPU state), and TS must
42 * be set (so that the clts/stts pair does nothing that is 35 * be set (so that the clts/stts pair does nothing that is
@@ -44,6 +37,9 @@
44 */ 37 */
45static inline bool interrupted_kernel_fpu_idle(void) 38static inline bool interrupted_kernel_fpu_idle(void)
46{ 39{
40 if (use_eager_fpu())
41 return 0;
42
47 return !__thread_has_fpu(current) && 43 return !__thread_has_fpu(current) &&
48 (read_cr0() & X86_CR0_TS); 44 (read_cr0() & X86_CR0_TS);
49} 45}
@@ -77,29 +73,29 @@ bool irq_fpu_usable(void)
77} 73}
78EXPORT_SYMBOL(irq_fpu_usable); 74EXPORT_SYMBOL(irq_fpu_usable);
79 75
80void kernel_fpu_begin(void) 76void __kernel_fpu_begin(void)
81{ 77{
82 struct task_struct *me = current; 78 struct task_struct *me = current;
83 79
84 WARN_ON_ONCE(!irq_fpu_usable());
85 preempt_disable();
86 if (__thread_has_fpu(me)) { 80 if (__thread_has_fpu(me)) {
87 __save_init_fpu(me); 81 __save_init_fpu(me);
88 __thread_clear_has_fpu(me); 82 __thread_clear_has_fpu(me);
89 /* We do 'stts()' in kernel_fpu_end() */ 83 /* We do 'stts()' in __kernel_fpu_end() */
90 } else { 84 } else if (!use_eager_fpu()) {
91 this_cpu_write(fpu_owner_task, NULL); 85 this_cpu_write(fpu_owner_task, NULL);
92 clts(); 86 clts();
93 } 87 }
94} 88}
95EXPORT_SYMBOL(kernel_fpu_begin); 89EXPORT_SYMBOL(__kernel_fpu_begin);
96 90
97void kernel_fpu_end(void) 91void __kernel_fpu_end(void)
98{ 92{
99 stts(); 93 if (use_eager_fpu())
100 preempt_enable(); 94 math_state_restore();
95 else
96 stts();
101} 97}
102EXPORT_SYMBOL(kernel_fpu_end); 98EXPORT_SYMBOL(__kernel_fpu_end);
103 99
104void unlazy_fpu(struct task_struct *tsk) 100void unlazy_fpu(struct task_struct *tsk)
105{ 101{
@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk)
113} 109}
114EXPORT_SYMBOL(unlazy_fpu); 110EXPORT_SYMBOL(unlazy_fpu);
115 111
116#ifdef CONFIG_MATH_EMULATION 112unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
117# define HAVE_HWFP (boot_cpu_data.hard_math)
118#else
119# define HAVE_HWFP 1
120#endif
121
122static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
123unsigned int xstate_size; 113unsigned int xstate_size;
124EXPORT_SYMBOL_GPL(xstate_size); 114EXPORT_SYMBOL_GPL(xstate_size);
125unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
126static struct i387_fxsave_struct fx_scratch __cpuinitdata; 115static struct i387_fxsave_struct fx_scratch __cpuinitdata;
127 116
128static void __cpuinit mxcsr_feature_mask_init(void) 117static void __cpuinit mxcsr_feature_mask_init(void)
129{ 118{
130 unsigned long mask = 0; 119 unsigned long mask = 0;
131 120
132 clts();
133 if (cpu_has_fxsr) { 121 if (cpu_has_fxsr) {
134 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); 122 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
135 asm volatile("fxsave %0" : : "m" (fx_scratch)); 123 asm volatile("fxsave %0" : : "m" (fx_scratch));
@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
138 mask = 0x0000ffbf; 126 mask = 0x0000ffbf;
139 } 127 }
140 mxcsr_feature_mask &= mask; 128 mxcsr_feature_mask &= mask;
141 stts();
142} 129}
143 130
144static void __cpuinit init_thread_xstate(void) 131static void __cpuinit init_thread_xstate(void)
@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void)
192 init_thread_xstate(); 179 init_thread_xstate();
193 180
194 mxcsr_feature_mask_init(); 181 mxcsr_feature_mask_init();
195 /* clean state in init */ 182 xsave_init();
196 current_thread_info()->status = 0; 183 eager_fpu_init();
197 clear_used_math();
198} 184}
199 185
200void fpu_finit(struct fpu *fpu) 186void fpu_finit(struct fpu *fpu)
@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu)
205 } 191 }
206 192
207 if (cpu_has_fxsr) { 193 if (cpu_has_fxsr) {
208 struct i387_fxsave_struct *fx = &fpu->state->fxsave; 194 fx_finit(&fpu->state->fxsave);
209
210 memset(fx, 0, xstate_size);
211 fx->cwd = 0x37f;
212 if (cpu_has_xmm)
213 fx->mxcsr = MXCSR_DEFAULT;
214 } else { 195 } else {
215 struct i387_fsave_struct *fp = &fpu->state->fsave; 196 struct i387_fsave_struct *fp = &fpu->state->fsave;
216 memset(fp, 0, xstate_size); 197 memset(fp, 0, xstate_size);
@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
454 * FXSR floating point environment conversions. 435 * FXSR floating point environment conversions.
455 */ 436 */
456 437
457static void 438void
458convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) 439convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
459{ 440{
460 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; 441 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
491 memcpy(&to[i], &from[i], sizeof(to[0])); 472 memcpy(&to[i], &from[i], sizeof(to[0]));
492} 473}
493 474
494static void convert_to_fxsr(struct task_struct *tsk, 475void convert_to_fxsr(struct task_struct *tsk,
495 const struct user_i387_ia32_struct *env) 476 const struct user_i387_ia32_struct *env)
496 477
497{ 478{
498 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; 479 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
589} 570}
590 571
591/* 572/*
592 * Signal frame handlers.
593 */
594
595static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
596{
597 struct task_struct *tsk = current;
598 struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave;
599
600 fp->status = fp->swd;
601 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
602 return -1;
603 return 1;
604}
605
606static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
607{
608 struct task_struct *tsk = current;
609 struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
610 struct user_i387_ia32_struct env;
611 int err = 0;
612
613 convert_from_fxsr(&env, tsk);
614 if (__copy_to_user(buf, &env, sizeof(env)))
615 return -1;
616
617 err |= __put_user(fx->swd, &buf->status);
618 err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
619 if (err)
620 return -1;
621
622 if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
623 return -1;
624 return 1;
625}
626
627static int save_i387_xsave(void __user *buf)
628{
629 struct task_struct *tsk = current;
630 struct _fpstate_ia32 __user *fx = buf;
631 int err = 0;
632
633
634 sanitize_i387_state(tsk);
635
636 /*
637 * For legacy compatible, we always set FP/SSE bits in the bit
638 * vector while saving the state to the user context.
639 * This will enable us capturing any changes(during sigreturn) to
640 * the FP/SSE bits by the legacy applications which don't touch
641 * xstate_bv in the xsave header.
642 *
643 * xsave aware applications can change the xstate_bv in the xsave
644 * header as well as change any contents in the memory layout.
645 * xrestore as part of sigreturn will capture all the changes.
646 */
647 tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
648
649 if (save_i387_fxsave(fx) < 0)
650 return -1;
651
652 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
653 sizeof(struct _fpx_sw_bytes));
654 err |= __put_user(FP_XSTATE_MAGIC2,
655 (__u32 __user *) (buf + sig_xstate_ia32_size
656 - FP_XSTATE_MAGIC2_SIZE));
657 if (err)
658 return -1;
659
660 return 1;
661}
662
663int save_i387_xstate_ia32(void __user *buf)
664{
665 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
666 struct task_struct *tsk = current;
667
668 if (!used_math())
669 return 0;
670
671 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
672 return -EACCES;
673 /*
674 * This will cause a "finit" to be triggered by the next
675 * attempted FPU operation by the 'current' process.
676 */
677 clear_used_math();
678
679 if (!HAVE_HWFP) {
680 return fpregs_soft_get(current, NULL,
681 0, sizeof(struct user_i387_ia32_struct),
682 NULL, fp) ? -1 : 1;
683 }
684
685 unlazy_fpu(tsk);
686
687 if (cpu_has_xsave)
688 return save_i387_xsave(fp);
689 if (cpu_has_fxsr)
690 return save_i387_fxsave(fp);
691 else
692 return save_i387_fsave(fp);
693}
694
695static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
696{
697 struct task_struct *tsk = current;
698
699 return __copy_from_user(&tsk->thread.fpu.state->fsave, buf,
700 sizeof(struct i387_fsave_struct));
701}
702
703static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
704 unsigned int size)
705{
706 struct task_struct *tsk = current;
707 struct user_i387_ia32_struct env;
708 int err;
709
710 err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0],
711 size);
712 /* mxcsr reserved bits must be masked to zero for security reasons */
713 tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
714 if (err || __copy_from_user(&env, buf, sizeof(env)))
715 return 1;
716 convert_to_fxsr(tsk, &env);
717
718 return 0;
719}
720
721static int restore_i387_xsave(void __user *buf)
722{
723 struct _fpx_sw_bytes fx_sw_user;
724 struct _fpstate_ia32 __user *fx_user =
725 ((struct _fpstate_ia32 __user *) buf);
726 struct i387_fxsave_struct __user *fx =
727 (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
728 struct xsave_hdr_struct *xsave_hdr =
729 &current->thread.fpu.state->xsave.xsave_hdr;
730 u64 mask;
731 int err;
732
733 if (check_for_xstate(fx, buf, &fx_sw_user))
734 goto fx_only;
735
736 mask = fx_sw_user.xstate_bv;
737
738 err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
739
740 xsave_hdr->xstate_bv &= pcntxt_mask;
741 /*
742 * These bits must be zero.
743 */
744 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
745
746 /*
747 * Init the state that is not present in the memory layout
748 * and enabled by the OS.
749 */
750 mask = ~(pcntxt_mask & ~mask);
751 xsave_hdr->xstate_bv &= mask;
752
753 return err;
754fx_only:
755 /*
756 * Couldn't find the extended state information in the memory
757 * layout. Restore the FP/SSE and init the other extended state
758 * enabled by the OS.
759 */
760 xsave_hdr->xstate_bv = XSTATE_FPSSE;
761 return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
762}
763
764int restore_i387_xstate_ia32(void __user *buf)
765{
766 int err;
767 struct task_struct *tsk = current;
768 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
769
770 if (HAVE_HWFP)
771 clear_fpu(tsk);
772
773 if (!buf) {
774 if (used_math()) {
775 clear_fpu(tsk);
776 clear_used_math();
777 }
778
779 return 0;
780 } else
781 if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
782 return -EACCES;
783
784 if (!used_math()) {
785 err = init_fpu(tsk);
786 if (err)
787 return err;
788 }
789
790 if (HAVE_HWFP) {
791 if (cpu_has_xsave)
792 err = restore_i387_xsave(buf);
793 else if (cpu_has_fxsr)
794 err = restore_i387_fxsave(fp, sizeof(struct
795 i387_fxsave_struct));
796 else
797 err = restore_i387_fsave(fp);
798 } else {
799 err = fpregs_soft_set(current, NULL,
800 0, sizeof(struct user_i387_ia32_struct),
801 NULL, fp) != 0;
802 }
803 set_used_math();
804
805 return err;
806}
807
808/*
809 * FPU state for core dumps. 573 * FPU state for core dumps.
810 * This is only used for a.out dumps now. 574 * This is only used for a.out dumps now.
811 * It is declared generically using elf_fpregset_t (which is 575 * It is declared generically using elf_fpregset_t (which is
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 36d1853e91af..9a5c460404dc 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -263,7 +263,7 @@ static void i8259A_shutdown(void)
263 * out of. 263 * out of.
264 */ 264 */
265 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 265 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
266 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ 266 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
267} 267}
268 268
269static struct syscore_ops i8259_syscore_ops = { 269static struct syscore_ops i8259_syscore_ops = {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d44f7829968e..e4595f105910 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
92 seq_printf(p, " Rescheduling interrupts\n"); 92 seq_printf(p, " Rescheduling interrupts\n");
93 seq_printf(p, "%*s: ", prec, "CAL"); 93 seq_printf(p, "%*s: ", prec, "CAL");
94 for_each_online_cpu(j) 94 for_each_online_cpu(j)
95 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 95 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
96 irq_stats(j)->irq_tlb_count);
96 seq_printf(p, " Function call interrupts\n"); 97 seq_printf(p, " Function call interrupts\n");
97 seq_printf(p, "%*s: ", prec, "TLB"); 98 seq_printf(p, "%*s: ", prec, "TLB");
98 for_each_online_cpu(j) 99 for_each_online_cpu(j)
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
147#ifdef CONFIG_SMP 148#ifdef CONFIG_SMP
148 sum += irq_stats(cpu)->irq_resched_count; 149 sum += irq_stats(cpu)->irq_resched_count;
149 sum += irq_stats(cpu)->irq_call_count; 150 sum += irq_stats(cpu)->irq_call_count;
150 sum += irq_stats(cpu)->irq_tlb_count;
151#endif 151#endif
152#ifdef CONFIG_X86_THERMAL_VECTOR 152#ifdef CONFIG_X86_THERMAL_VECTOR
153 sum += irq_stats(cpu)->irq_thermal_count; 153 sum += irq_stats(cpu)->irq_thermal_count;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e2f751efb7b1..57916c0d3cf6 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
541 return 1; 541 return 1;
542} 542}
543 543
544#ifdef KPROBES_CAN_USE_FTRACE
545static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
546 struct kprobe_ctlblk *kcb)
547{
548 /*
549 * Emulate singlestep (and also recover regs->ip)
550 * as if there is a 5byte nop
551 */
552 regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
553 if (unlikely(p->post_handler)) {
554 kcb->kprobe_status = KPROBE_HIT_SSDONE;
555 p->post_handler(p, regs, 0);
556 }
557 __this_cpu_write(current_kprobe, NULL);
558}
559#endif
560
544/* 561/*
545 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 562 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
546 * remain disabled throughout this function. 563 * remain disabled throughout this function.
@@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
599 } else if (kprobe_running()) { 616 } else if (kprobe_running()) {
600 p = __this_cpu_read(current_kprobe); 617 p = __this_cpu_read(current_kprobe);
601 if (p->break_handler && p->break_handler(p, regs)) { 618 if (p->break_handler && p->break_handler(p, regs)) {
619#ifdef KPROBES_CAN_USE_FTRACE
620 if (kprobe_ftrace(p)) {
621 skip_singlestep(p, regs, kcb);
622 return 1;
623 }
624#endif
602 setup_singlestep(p, regs, kcb, 0); 625 setup_singlestep(p, regs, kcb, 0);
603 return 1; 626 return 1;
604 } 627 }
@@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1052 return 0; 1075 return 0;
1053} 1076}
1054 1077
1078#ifdef KPROBES_CAN_USE_FTRACE
1079/* Ftrace callback handler for kprobes */
1080void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
1081 struct ftrace_ops *ops, struct pt_regs *regs)
1082{
1083 struct kprobe *p;
1084 struct kprobe_ctlblk *kcb;
1085 unsigned long flags;
1086
1087 /* Disable irq for emulating a breakpoint and avoiding preempt */
1088 local_irq_save(flags);
1089
1090 p = get_kprobe((kprobe_opcode_t *)ip);
1091 if (unlikely(!p) || kprobe_disabled(p))
1092 goto end;
1093
1094 kcb = get_kprobe_ctlblk();
1095 if (kprobe_running()) {
1096 kprobes_inc_nmissed_count(p);
1097 } else {
1098 /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
1099 regs->ip = ip + sizeof(kprobe_opcode_t);
1100
1101 __this_cpu_write(current_kprobe, p);
1102 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1103 if (!p->pre_handler || !p->pre_handler(p, regs))
1104 skip_singlestep(p, regs, kcb);
1105 /*
1106 * If pre_handler returns !0, it sets regs->ip and
1107 * resets current kprobe.
1108 */
1109 }
1110end:
1111 local_irq_restore(flags);
1112}
1113
1114int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
1115{
1116 p->ainsn.insn = NULL;
1117 p->ainsn.boostable = -1;
1118 return 0;
1119}
1120#endif
1121
1055int __init arch_init_kprobes(void) 1122int __init arch_init_kprobes(void)
1056{ 1123{
1057 return arch_init_optprobes(); 1124 return arch_init_optprobes();
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 82746f942cd8..7720ff5a9ee2 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -75,20 +75,113 @@ struct microcode_amd {
75 75
76static struct equiv_cpu_entry *equiv_cpu_table; 76static struct equiv_cpu_entry *equiv_cpu_table;
77 77
78/* page-sized ucode patch buffer */ 78struct ucode_patch {
79void *patch; 79 struct list_head plist;
80 void *data;
81 u32 patch_id;
82 u16 equiv_cpu;
83};
84
85static LIST_HEAD(pcache);
86
87static u16 find_equiv_id(unsigned int cpu)
88{
89 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
90 int i = 0;
91
92 if (!equiv_cpu_table)
93 return 0;
94
95 while (equiv_cpu_table[i].installed_cpu != 0) {
96 if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
97 return equiv_cpu_table[i].equiv_cpu;
98
99 i++;
100 }
101 return 0;
102}
103
104static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
105{
106 int i = 0;
107
108 BUG_ON(!equiv_cpu_table);
109
110 while (equiv_cpu_table[i].equiv_cpu != 0) {
111 if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
112 return equiv_cpu_table[i].installed_cpu;
113 i++;
114 }
115 return 0;
116}
117
118/*
119 * a small, trivial cache of per-family ucode patches
120 */
121static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
122{
123 struct ucode_patch *p;
124
125 list_for_each_entry(p, &pcache, plist)
126 if (p->equiv_cpu == equiv_cpu)
127 return p;
128 return NULL;
129}
130
131static void update_cache(struct ucode_patch *new_patch)
132{
133 struct ucode_patch *p;
134
135 list_for_each_entry(p, &pcache, plist) {
136 if (p->equiv_cpu == new_patch->equiv_cpu) {
137 if (p->patch_id >= new_patch->patch_id)
138 /* we already have the latest patch */
139 return;
140
141 list_replace(&p->plist, &new_patch->plist);
142 kfree(p->data);
143 kfree(p);
144 return;
145 }
146 }
147 /* no patch found, add it */
148 list_add_tail(&new_patch->plist, &pcache);
149}
150
151static void free_cache(void)
152{
153 struct ucode_patch *p, *tmp;
154
155 list_for_each_entry_safe(p, tmp, &pcache, plist) {
156 __list_del(p->plist.prev, p->plist.next);
157 kfree(p->data);
158 kfree(p);
159 }
160}
161
162static struct ucode_patch *find_patch(unsigned int cpu)
163{
164 u16 equiv_id;
165
166 equiv_id = find_equiv_id(cpu);
167 if (!equiv_id)
168 return NULL;
169
170 return cache_find_patch(equiv_id);
171}
80 172
81static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 173static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
82{ 174{
83 struct cpuinfo_x86 *c = &cpu_data(cpu); 175 struct cpuinfo_x86 *c = &cpu_data(cpu);
84 176
177 csig->sig = cpuid_eax(0x00000001);
85 csig->rev = c->microcode; 178 csig->rev = c->microcode;
86 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); 179 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
87 180
88 return 0; 181 return 0;
89} 182}
90 183
91static unsigned int verify_ucode_size(int cpu, u32 patch_size, 184static unsigned int verify_patch_size(int cpu, u32 patch_size,
92 unsigned int size) 185 unsigned int size)
93{ 186{
94 struct cpuinfo_x86 *c = &cpu_data(cpu); 187 struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
118 return patch_size; 211 return patch_size;
119} 212}
120 213
121static u16 find_equiv_id(void) 214static int apply_microcode_amd(int cpu)
122{ 215{
123 unsigned int current_cpu_id, i = 0; 216 struct cpuinfo_x86 *c = &cpu_data(cpu);
124 217 struct microcode_amd *mc_amd;
125 BUG_ON(equiv_cpu_table == NULL); 218 struct ucode_cpu_info *uci;
126 219 struct ucode_patch *p;
127 current_cpu_id = cpuid_eax(0x00000001); 220 u32 rev, dummy;
128
129 while (equiv_cpu_table[i].installed_cpu != 0) {
130 if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
131 return equiv_cpu_table[i].equiv_cpu;
132
133 i++;
134 }
135 return 0;
136}
137 221
138/* 222 BUG_ON(raw_smp_processor_id() != cpu);
139 * we signal a good patch is found by returning its size > 0
140 */
141static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
142 unsigned int leftover_size, int rev,
143 unsigned int *current_size)
144{
145 struct microcode_header_amd *mc_hdr;
146 unsigned int actual_size, patch_size;
147 u16 equiv_cpu_id;
148 223
149 /* size of the current patch we're staring at */ 224 uci = ucode_cpu_info + cpu;
150 patch_size = *(u32 *)(ucode_ptr + 4);
151 *current_size = patch_size + SECTION_HDR_SIZE;
152 225
153 equiv_cpu_id = find_equiv_id(); 226 p = find_patch(cpu);
154 if (!equiv_cpu_id) 227 if (!p)
155 return 0; 228 return 0;
156 229
157 /* 230 mc_amd = p->data;
158 * let's look at the patch header itself now 231 uci->mc = p->data;
159 */
160 mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
161 232
162 if (mc_hdr->processor_rev_id != equiv_cpu_id) 233 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
163 return 0;
164 234
165 /* ucode might be chipset specific -- currently we don't support this */ 235 /* need to apply patch? */
166 if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { 236 if (rev >= mc_amd->hdr.patch_id) {
167 pr_err("CPU%d: chipset specific code not yet supported\n", 237 c->microcode = rev;
168 cpu);
169 return 0; 238 return 0;
170 } 239 }
171 240
172 if (mc_hdr->patch_id <= rev)
173 return 0;
174
175 /*
176 * now that the header looks sane, verify its size
177 */
178 actual_size = verify_ucode_size(cpu, patch_size, leftover_size);
179 if (!actual_size)
180 return 0;
181
182 /* clear the patch buffer */
183 memset(patch, 0, PAGE_SIZE);
184
185 /* all looks ok, get the binary patch */
186 get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
187
188 return actual_size;
189}
190
191static int apply_microcode_amd(int cpu)
192{
193 u32 rev, dummy;
194 int cpu_num = raw_smp_processor_id();
195 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
196 struct microcode_amd *mc_amd = uci->mc;
197 struct cpuinfo_x86 *c = &cpu_data(cpu);
198
199 /* We should bind the task to the CPU */
200 BUG_ON(cpu_num != cpu);
201
202 if (mc_amd == NULL)
203 return 0;
204
205 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); 241 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
206 /* get patch id after patching */
207 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
208 242
209 /* check current patch id and patch's id for match */ 243 /* verify patch application was successful */
244 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
210 if (rev != mc_amd->hdr.patch_id) { 245 if (rev != mc_amd->hdr.patch_id) {
211 pr_err("CPU%d: update failed for patch_level=0x%08x\n", 246 pr_err("CPU%d: update failed for patch_level=0x%08x\n",
212 cpu, mc_amd->hdr.patch_id); 247 cpu, mc_amd->hdr.patch_id);
@@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf)
238 return -ENOMEM; 273 return -ENOMEM;
239 } 274 }
240 275
241 get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); 276 memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
242 277
243 /* add header length */ 278 /* add header length */
244 return size + CONTAINER_HDR_SZ; 279 return size + CONTAINER_HDR_SZ;
@@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void)
250 equiv_cpu_table = NULL; 285 equiv_cpu_table = NULL;
251} 286}
252 287
253static enum ucode_state 288static void cleanup(void)
254generic_load_microcode(int cpu, const u8 *data, size_t size)
255{ 289{
256 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 290 free_equiv_cpu_table();
257 struct microcode_header_amd *mc_hdr = NULL; 291 free_cache();
258 unsigned int mc_size, leftover, current_size = 0; 292}
293
294/*
295 * We return the current size even if some of the checks failed so that
296 * we can skip over the next patch. If we return a negative value, we
297 * signal a grave error like a memory allocation has failed and the
298 * driver cannot continue functioning normally. In such cases, we tear
299 * down everything we've used up so far and exit.
300 */
301static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
302{
303 struct cpuinfo_x86 *c = &cpu_data(cpu);
304 struct microcode_header_amd *mc_hdr;
305 struct ucode_patch *patch;
306 unsigned int patch_size, crnt_size, ret;
307 u32 proc_fam;
308 u16 proc_id;
309
310 patch_size = *(u32 *)(fw + 4);
311 crnt_size = patch_size + SECTION_HDR_SIZE;
312 mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
313 proc_id = mc_hdr->processor_rev_id;
314
315 proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
316 if (!proc_fam) {
317 pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
318 return crnt_size;
319 }
320
321 /* check if patch is for the current family */
322 proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
323 if (proc_fam != c->x86)
324 return crnt_size;
325
326 if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
327 pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
328 mc_hdr->patch_id);
329 return crnt_size;
330 }
331
332 ret = verify_patch_size(cpu, patch_size, leftover);
333 if (!ret) {
334 pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
335 return crnt_size;
336 }
337
338 patch = kzalloc(sizeof(*patch), GFP_KERNEL);
339 if (!patch) {
340 pr_err("Patch allocation failure.\n");
341 return -EINVAL;
342 }
343
344 patch->data = kzalloc(patch_size, GFP_KERNEL);
345 if (!patch->data) {
346 pr_err("Patch data allocation failure.\n");
347 kfree(patch);
348 return -EINVAL;
349 }
350
351 /* All looks ok, copy patch... */
352 memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
353 INIT_LIST_HEAD(&patch->plist);
354 patch->patch_id = mc_hdr->patch_id;
355 patch->equiv_cpu = proc_id;
356
357 /* ... and add to cache. */
358 update_cache(patch);
359
360 return crnt_size;
361}
362
363static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
364{
365 enum ucode_state ret = UCODE_ERROR;
366 unsigned int leftover;
367 u8 *fw = (u8 *)data;
368 int crnt_size = 0;
259 int offset; 369 int offset;
260 const u8 *ucode_ptr = data;
261 void *new_mc = NULL;
262 unsigned int new_rev = uci->cpu_sig.rev;
263 enum ucode_state state = UCODE_ERROR;
264 370
265 offset = install_equiv_cpu_table(ucode_ptr); 371 offset = install_equiv_cpu_table(data);
266 if (offset < 0) { 372 if (offset < 0) {
267 pr_err("failed to create equivalent cpu table\n"); 373 pr_err("failed to create equivalent cpu table\n");
268 goto out; 374 return ret;
269 } 375 }
270 ucode_ptr += offset; 376 fw += offset;
271 leftover = size - offset; 377 leftover = size - offset;
272 378
273 if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { 379 if (*(u32 *)fw != UCODE_UCODE_TYPE) {
274 pr_err("invalid type field in container file section header\n"); 380 pr_err("invalid type field in container file section header\n");
275 goto free_table; 381 free_equiv_cpu_table();
382 return ret;
276 } 383 }
277 384
278 while (leftover) { 385 while (leftover) {
279 mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, 386 crnt_size = verify_and_add_patch(cpu, fw, leftover);
280 new_rev, &current_size); 387 if (crnt_size < 0)
281 if (mc_size) { 388 return ret;
282 mc_hdr = patch;
283 new_mc = patch;
284 new_rev = mc_hdr->patch_id;
285 goto out_ok;
286 }
287
288 ucode_ptr += current_size;
289 leftover -= current_size;
290 }
291 389
292 if (!new_mc) { 390 fw += crnt_size;
293 state = UCODE_NFOUND; 391 leftover -= crnt_size;
294 goto free_table;
295 } 392 }
296 393
297out_ok: 394 return UCODE_OK;
298 uci->mc = new_mc;
299 state = UCODE_OK;
300 pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
301 cpu, uci->cpu_sig.rev, new_rev);
302
303free_table:
304 free_equiv_cpu_table();
305
306out:
307 return state;
308} 395}
309 396
310/* 397/*
@@ -315,7 +402,7 @@ out:
315 * 402 *
316 * This legacy file is always smaller than 2K in size. 403 * This legacy file is always smaller than 2K in size.
317 * 404 *
318 * Starting at family 15h they are in family specific firmware files: 405 * Beginning with family 15h, they are in family-specific firmware files:
319 * 406 *
320 * amd-ucode/microcode_amd_fam15h.bin 407 * amd-ucode/microcode_amd_fam15h.bin
321 * amd-ucode/microcode_amd_fam16h.bin 408 * amd-ucode/microcode_amd_fam16h.bin
@@ -323,12 +410,17 @@ out:
323 * 410 *
324 * These might be larger than 2K. 411 * These might be larger than 2K.
325 */ 412 */
326static enum ucode_state request_microcode_amd(int cpu, struct device *device) 413static enum ucode_state request_microcode_amd(int cpu, struct device *device,
414 bool refresh_fw)
327{ 415{
328 char fw_name[36] = "amd-ucode/microcode_amd.bin"; 416 char fw_name[36] = "amd-ucode/microcode_amd.bin";
329 const struct firmware *fw;
330 enum ucode_state ret = UCODE_NFOUND;
331 struct cpuinfo_x86 *c = &cpu_data(cpu); 417 struct cpuinfo_x86 *c = &cpu_data(cpu);
418 enum ucode_state ret = UCODE_NFOUND;
419 const struct firmware *fw;
420
421 /* reload ucode container only on the boot cpu */
422 if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
423 return UCODE_OK;
332 424
333 if (c->x86 >= 0x15) 425 if (c->x86 >= 0x15)
334 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); 426 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
@@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device)
344 goto fw_release; 436 goto fw_release;
345 } 437 }
346 438
347 ret = generic_load_microcode(cpu, fw->data, fw->size); 439 /* free old equiv table */
440 free_equiv_cpu_table();
441
442 ret = load_microcode_amd(cpu, fw->data, fw->size);
443 if (ret != UCODE_OK)
444 cleanup();
348 445
349fw_release: 446 fw_release:
350 release_firmware(fw); 447 release_firmware(fw);
351 448
352out: 449 out:
353 return ret; 450 return ret;
354} 451}
355 452
@@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void)
383 return NULL; 480 return NULL;
384 } 481 }
385 482
386 patch = (void *)get_zeroed_page(GFP_KERNEL);
387 if (!patch)
388 return NULL;
389
390 return &microcode_amd_ops; 483 return &microcode_amd_ops;
391} 484}
392 485
393void __exit exit_amd_microcode(void) 486void __exit exit_amd_microcode(void)
394{ 487{
395 free_page((unsigned long)patch); 488 cleanup();
396} 489}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9e5bcf1e2376..3a04b224d0c0 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -279,19 +279,18 @@ static struct platform_device *microcode_pdev;
279static int reload_for_cpu(int cpu) 279static int reload_for_cpu(int cpu)
280{ 280{
281 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 281 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
282 enum ucode_state ustate;
282 int err = 0; 283 int err = 0;
283 284
284 if (uci->valid) { 285 if (!uci->valid)
285 enum ucode_state ustate; 286 return err;
286
287 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
288 if (ustate == UCODE_OK)
289 apply_microcode_on_target(cpu);
290 else
291 if (ustate == UCODE_ERROR)
292 err = -EINVAL;
293 }
294 287
288 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
289 if (ustate == UCODE_OK)
290 apply_microcode_on_target(cpu);
291 else
292 if (ustate == UCODE_ERROR)
293 err = -EINVAL;
295 return err; 294 return err;
296} 295}
297 296
@@ -373,18 +372,15 @@ static void microcode_fini_cpu(int cpu)
373 372
374static enum ucode_state microcode_resume_cpu(int cpu) 373static enum ucode_state microcode_resume_cpu(int cpu)
375{ 374{
376 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
377
378 if (!uci->mc)
379 return UCODE_NFOUND;
380
381 pr_debug("CPU%d updated upon resume\n", cpu); 375 pr_debug("CPU%d updated upon resume\n", cpu);
382 apply_microcode_on_target(cpu); 376
377 if (apply_microcode_on_target(cpu))
378 return UCODE_ERROR;
383 379
384 return UCODE_OK; 380 return UCODE_OK;
385} 381}
386 382
387static enum ucode_state microcode_init_cpu(int cpu) 383static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
388{ 384{
389 enum ucode_state ustate; 385 enum ucode_state ustate;
390 386
@@ -395,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu)
395 if (system_state != SYSTEM_RUNNING) 391 if (system_state != SYSTEM_RUNNING)
396 return UCODE_NFOUND; 392 return UCODE_NFOUND;
397 393
398 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev); 394 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
395 refresh_fw);
399 396
400 if (ustate == UCODE_OK) { 397 if (ustate == UCODE_OK) {
401 pr_debug("CPU%d updated upon init\n", cpu); 398 pr_debug("CPU%d updated upon init\n", cpu);
@@ -408,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu)
408static enum ucode_state microcode_update_cpu(int cpu) 405static enum ucode_state microcode_update_cpu(int cpu)
409{ 406{
410 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 407 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
411 enum ucode_state ustate;
412 408
413 if (uci->valid) 409 if (uci->valid)
414 ustate = microcode_resume_cpu(cpu); 410 return microcode_resume_cpu(cpu);
415 else
416 ustate = microcode_init_cpu(cpu);
417 411
418 return ustate; 412 return microcode_init_cpu(cpu, false);
419} 413}
420 414
421static int mc_device_add(struct device *dev, struct subsys_interface *sif) 415static int mc_device_add(struct device *dev, struct subsys_interface *sif)
@@ -431,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
431 if (err) 425 if (err)
432 return err; 426 return err;
433 427
434 if (microcode_init_cpu(cpu) == UCODE_ERROR) 428 if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
435 return -EINVAL; 429 return -EINVAL;
436 430
437 return err; 431 return err;
@@ -480,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
480 struct device *dev; 474 struct device *dev;
481 475
482 dev = get_cpu_device(cpu); 476 dev = get_cpu_device(cpu);
483 switch (action) { 477
478 switch (action & ~CPU_TASKS_FROZEN) {
484 case CPU_ONLINE: 479 case CPU_ONLINE:
485 case CPU_ONLINE_FROZEN:
486 microcode_update_cpu(cpu); 480 microcode_update_cpu(cpu);
487 case CPU_DOWN_FAILED:
488 case CPU_DOWN_FAILED_FROZEN:
489 pr_debug("CPU%d added\n", cpu); 481 pr_debug("CPU%d added\n", cpu);
482 /*
483 * "break" is missing on purpose here because we want to fall
484 * through in order to create the sysfs group.
485 */
486
487 case CPU_DOWN_FAILED:
490 if (sysfs_create_group(&dev->kobj, &mc_attr_group)) 488 if (sysfs_create_group(&dev->kobj, &mc_attr_group))
491 pr_err("Failed to create group for CPU%d\n", cpu); 489 pr_err("Failed to create group for CPU%d\n", cpu);
492 break; 490 break;
491
493 case CPU_DOWN_PREPARE: 492 case CPU_DOWN_PREPARE:
494 case CPU_DOWN_PREPARE_FROZEN:
495 /* Suspend is in progress, only remove the interface */ 493 /* Suspend is in progress, only remove the interface */
496 sysfs_remove_group(&dev->kobj, &mc_attr_group); 494 sysfs_remove_group(&dev->kobj, &mc_attr_group);
497 pr_debug("CPU%d removed\n", cpu); 495 pr_debug("CPU%d removed\n", cpu);
498 break; 496 break;
499 497
500 /* 498 /*
499 * case CPU_DEAD:
500 *
501 * When a CPU goes offline, don't free up or invalidate the copy of 501 * When a CPU goes offline, don't free up or invalidate the copy of
502 * the microcode in kernel memory, so that we can reuse it when the 502 * the microcode in kernel memory, so that we can reuse it when the
503 * CPU comes back online without unnecessarily requesting the userspace 503 * CPU comes back online without unnecessarily requesting the userspace
504 * for it again. 504 * for it again.
505 */ 505 */
506 case CPU_UP_CANCELED_FROZEN:
507 /* The CPU refused to come up during a system resume */
508 microcode_fini_cpu(cpu);
509 break;
510 } 506 }
507
508 /* The CPU refused to come up during a system resume */
509 if (action == CPU_UP_CANCELED_FROZEN)
510 microcode_fini_cpu(cpu);
511
511 return NOTIFY_OK; 512 return NOTIFY_OK;
512} 513}
513 514
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0327e2b3c408..3544aed39338 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
405 return 0; 405 return 0;
406} 406}
407 407
408static enum ucode_state request_microcode_fw(int cpu, struct device *device) 408static enum ucode_state request_microcode_fw(int cpu, struct device *device,
409 bool refresh_fw)
409{ 410{
410 char name[30]; 411 char name[30];
411 struct cpuinfo_x86 *c = &cpu_data(cpu); 412 struct cpuinfo_x86 *c = &cpu_data(cpu);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index eb113693f043..a7c5661f8496 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -257,12 +257,14 @@ static int __init msr_init(void)
257 goto out_chrdev; 257 goto out_chrdev;
258 } 258 }
259 msr_class->devnode = msr_devnode; 259 msr_class->devnode = msr_devnode;
260 get_online_cpus();
260 for_each_online_cpu(i) { 261 for_each_online_cpu(i) {
261 err = msr_device_create(i); 262 err = msr_device_create(i);
262 if (err != 0) 263 if (err != 0)
263 goto out_class; 264 goto out_class;
264 } 265 }
265 register_hotcpu_notifier(&msr_class_cpu_notifier); 266 register_hotcpu_notifier(&msr_class_cpu_notifier);
267 put_online_cpus();
266 268
267 err = 0; 269 err = 0;
268 goto out; 270 goto out;
@@ -271,6 +273,7 @@ out_class:
271 i = 0; 273 i = 0;
272 for_each_online_cpu(i) 274 for_each_online_cpu(i)
273 msr_device_destroy(i); 275 msr_device_destroy(i);
276 put_online_cpus();
274 class_destroy(msr_class); 277 class_destroy(msr_class);
275out_chrdev: 278out_chrdev:
276 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); 279 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -281,11 +284,13 @@ out:
281static void __exit msr_exit(void) 284static void __exit msr_exit(void)
282{ 285{
283 int cpu = 0; 286 int cpu = 0;
287 get_online_cpus();
284 for_each_online_cpu(cpu) 288 for_each_online_cpu(cpu)
285 msr_device_destroy(cpu); 289 msr_device_destroy(cpu);
286 class_destroy(msr_class); 290 class_destroy(msr_class);
287 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); 291 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
288 unregister_hotcpu_notifier(&msr_class_cpu_notifier); 292 unregister_hotcpu_notifier(&msr_class_cpu_notifier);
293 put_online_cpus();
289} 294}
290 295
291module_init(msr_init); 296module_init(msr_init);
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
new file mode 100644
index 000000000000..e309cc5c276e
--- /dev/null
+++ b/arch/x86/kernel/perf_regs.c
@@ -0,0 +1,105 @@
1#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/perf_event.h>
5#include <linux/bug.h>
6#include <linux/stddef.h>
7#include <asm/perf_regs.h>
8#include <asm/ptrace.h>
9
10#ifdef CONFIG_X86_32
11#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
12#else
13#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
14#endif
15
16#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
17
18static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
19 PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
20 PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
21 PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
22 PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
23 PT_REGS_OFFSET(PERF_REG_X86_SI, si),
24 PT_REGS_OFFSET(PERF_REG_X86_DI, di),
25 PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
26 PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
27 PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
28 PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
29 PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
30 PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
31#ifdef CONFIG_X86_32
32 PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
33 PT_REGS_OFFSET(PERF_REG_X86_ES, es),
34 PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
35 PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
36#else
37 /*
38 * The pt_regs struct does not store
39 * ds, es, fs, gs in 64 bit mode.
40 */
41 (unsigned int) -1,
42 (unsigned int) -1,
43 (unsigned int) -1,
44 (unsigned int) -1,
45#endif
46#ifdef CONFIG_X86_64
47 PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
48 PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
49 PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
50 PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
51 PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
52 PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
53 PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
54 PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
55#endif
56};
57
58u64 perf_reg_value(struct pt_regs *regs, int idx)
59{
60 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
61 return 0;
62
63 return regs_get_register(regs, pt_regs_offset[idx]);
64}
65
66#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
67
68#ifdef CONFIG_X86_32
69int perf_reg_validate(u64 mask)
70{
71 if (!mask || mask & REG_RESERVED)
72 return -EINVAL;
73
74 return 0;
75}
76
77u64 perf_reg_abi(struct task_struct *task)
78{
79 return PERF_SAMPLE_REGS_ABI_32;
80}
81#else /* CONFIG_X86_64 */
82#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
83 (1ULL << PERF_REG_X86_ES) | \
84 (1ULL << PERF_REG_X86_FS) | \
85 (1ULL << PERF_REG_X86_GS))
86
87int perf_reg_validate(u64 mask)
88{
89 if (!mask || mask & REG_RESERVED)
90 return -EINVAL;
91
92 if (mask & REG_NOSUPPORT)
93 return -EINVAL;
94
95 return 0;
96}
97
98u64 perf_reg_abi(struct task_struct *task)
99{
100 if (test_tsk_thread_flag(task, TIF_IA32))
101 return PERF_SAMPLE_REGS_ABI_32;
102 else
103 return PERF_SAMPLE_REGS_ABI_64;
104}
105#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 0bc72e2069e3..d5f15c3f7b25 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev)
150 return oprom; 150 return oprom;
151} 151}
152 152
153void *pci_map_biosrom(struct pci_dev *pdev) 153void __iomem *pci_map_biosrom(struct pci_dev *pdev)
154{ 154{
155 struct resource *oprom = find_oprom(pdev); 155 struct resource *oprom = find_oprom(pdev);
156 156
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ef6a8456f719..dc3567e083f9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
66{ 66{
67 int ret; 67 int ret;
68 68
69 unlazy_fpu(src);
70
71 *dst = *src; 69 *dst = *src;
72 if (fpu_allocated(&src->thread.fpu)) { 70 if (fpu_allocated(&src->thread.fpu)) {
73 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); 71 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
74 ret = fpu_alloc(&dst->thread.fpu); 72 ret = fpu_alloc(&dst->thread.fpu);
75 if (ret) 73 if (ret)
76 return ret; 74 return ret;
77 fpu_copy(&dst->thread.fpu, &src->thread.fpu); 75 fpu_copy(dst, src);
78 } 76 }
79 return 0; 77 return 0;
80} 78}
@@ -97,16 +95,6 @@ void arch_task_cache_init(void)
97 SLAB_PANIC | SLAB_NOTRACK, NULL); 95 SLAB_PANIC | SLAB_NOTRACK, NULL);
98} 96}
99 97
100static inline void drop_fpu(struct task_struct *tsk)
101{
102 /*
103 * Forget coprocessor state..
104 */
105 tsk->fpu_counter = 0;
106 clear_fpu(tsk);
107 clear_used_math();
108}
109
110/* 98/*
111 * Free current thread data structures etc.. 99 * Free current thread data structures etc..
112 */ 100 */
@@ -163,7 +151,13 @@ void flush_thread(void)
163 151
164 flush_ptrace_hw_breakpoint(tsk); 152 flush_ptrace_hw_breakpoint(tsk);
165 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 153 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
166 drop_fpu(tsk); 154 drop_init_fpu(tsk);
155 /*
156 * Free the FPU state for non xsave platforms. They get reallocated
157 * lazily at the first use.
158 */
159 if (!use_eager_fpu())
160 free_thread_xstate(tsk);
167} 161}
168 162
169static void hard_disable_TSC(void) 163static void hard_disable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 516fa186121b..b9ff83c7135b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
190 regs->cs = __USER_CS; 190 regs->cs = __USER_CS;
191 regs->ip = new_ip; 191 regs->ip = new_ip;
192 regs->sp = new_sp; 192 regs->sp = new_sp;
193 /*
194 * Free the old FP and other extended state
195 */
196 free_thread_xstate(current);
197} 193}
198EXPORT_SYMBOL_GPL(start_thread); 194EXPORT_SYMBOL_GPL(start_thread);
199 195
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0a980c9d7cb8..8a6d20ce1978 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
232 regs->cs = _cs; 232 regs->cs = _cs;
233 regs->ss = _ss; 233 regs->ss = _ss;
234 regs->flags = X86_EFLAGS_IF; 234 regs->flags = X86_EFLAGS_IF;
235 /*
236 * Free the old FP and other extended state
237 */
238 free_thread_xstate(current);
239} 235}
240 236
241void 237void
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4c6a5c2bf0f..b00b33a18390 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
21#include <linux/signal.h> 21#include <linux/signal.h>
22#include <linux/perf_event.h> 22#include <linux/perf_event.h>
23#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
24#include <linux/rcupdate.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include <asm/pgtable.h> 27#include <asm/pgtable.h>
@@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = {
1332#define genregs32_get genregs_get 1333#define genregs32_get genregs_get
1333#define genregs32_set genregs_set 1334#define genregs32_set genregs_set
1334 1335
1335#define user_i387_ia32_struct user_i387_struct
1336#define user32_fxsr_struct user_fxsr_struct
1337
1338#endif /* CONFIG_X86_64 */ 1336#endif /* CONFIG_X86_64 */
1339 1337
1340#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1338#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs)
1463{ 1461{
1464 long ret = 0; 1462 long ret = 0;
1465 1463
1464 rcu_user_exit();
1465
1466 /* 1466 /*
1467 * If we stepped into a sysenter/syscall insn, it trapped in 1467 * If we stepped into a sysenter/syscall insn, it trapped in
1468 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. 1468 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
@@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs)
1526 !test_thread_flag(TIF_SYSCALL_EMU); 1526 !test_thread_flag(TIF_SYSCALL_EMU);
1527 if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 1527 if (step || test_thread_flag(TIF_SYSCALL_TRACE))
1528 tracehook_report_syscall_exit(regs, step); 1528 tracehook_report_syscall_exit(regs, step);
1529
1530 rcu_user_enter();
1529} 1531}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b95..4f165479c453 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -961,9 +961,7 @@ void __init setup_arch(char **cmdline_p)
961 kvmclock_init(); 961 kvmclock_init();
962#endif 962#endif
963 963
964 x86_init.paging.pagetable_setup_start(swapper_pg_dir); 964 x86_init.paging.pagetable_init();
965 paging_init();
966 x86_init.paging.pagetable_setup_done(swapper_pg_dir);
967 965
968 if (boot_cpu_data.cpuid_level >= 0) { 966 if (boot_cpu_data.cpuid_level >= 0) {
969 /* A CPU has %cr4 if and only if it has CPUID */ 967 /* A CPU has %cr4 if and only if it has CPUID */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376e..b33144c8b309 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
114 regs->orig_ax = -1; /* disable syscall checks */ 114 regs->orig_ax = -1; /* disable syscall checks */
115 115
116 get_user_ex(buf, &sc->fpstate); 116 get_user_ex(buf, &sc->fpstate);
117 err |= restore_i387_xstate(buf);
118 117
119 get_user_ex(*pax, &sc->ax); 118 get_user_ex(*pax, &sc->ax);
120 } get_user_catch(err); 119 } get_user_catch(err);
121 120
121 err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
122
122 return err; 123 return err;
123} 124}
124 125
@@ -206,35 +207,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
206 void __user **fpstate) 207 void __user **fpstate)
207{ 208{
208 /* Default to using normal stack */ 209 /* Default to using normal stack */
210 unsigned long math_size = 0;
209 unsigned long sp = regs->sp; 211 unsigned long sp = regs->sp;
212 unsigned long buf_fx = 0;
210 int onsigstack = on_sig_stack(sp); 213 int onsigstack = on_sig_stack(sp);
211 214
212#ifdef CONFIG_X86_64
213 /* redzone */ 215 /* redzone */
214 sp -= 128; 216 if (config_enabled(CONFIG_X86_64))
215#endif /* CONFIG_X86_64 */ 217 sp -= 128;
216 218
217 if (!onsigstack) { 219 if (!onsigstack) {
218 /* This is the X/Open sanctioned signal stack switching. */ 220 /* This is the X/Open sanctioned signal stack switching. */
219 if (ka->sa.sa_flags & SA_ONSTACK) { 221 if (ka->sa.sa_flags & SA_ONSTACK) {
220 if (current->sas_ss_size) 222 if (current->sas_ss_size)
221 sp = current->sas_ss_sp + current->sas_ss_size; 223 sp = current->sas_ss_sp + current->sas_ss_size;
222 } else { 224 } else if (config_enabled(CONFIG_X86_32) &&
223#ifdef CONFIG_X86_32 225 (regs->ss & 0xffff) != __USER_DS &&
224 /* This is the legacy signal stack switching. */ 226 !(ka->sa.sa_flags & SA_RESTORER) &&
225 if ((regs->ss & 0xffff) != __USER_DS && 227 ka->sa.sa_restorer) {
226 !(ka->sa.sa_flags & SA_RESTORER) && 228 /* This is the legacy signal stack switching. */
227 ka->sa.sa_restorer)
228 sp = (unsigned long) ka->sa.sa_restorer; 229 sp = (unsigned long) ka->sa.sa_restorer;
229#endif /* CONFIG_X86_32 */
230 } 230 }
231 } 231 }
232 232
233 if (used_math()) { 233 if (used_math()) {
234 sp -= sig_xstate_size; 234 sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
235#ifdef CONFIG_X86_64 235 &buf_fx, &math_size);
236 sp = round_down(sp, 64);
237#endif /* CONFIG_X86_64 */
238 *fpstate = (void __user *)sp; 236 *fpstate = (void __user *)sp;
239 } 237 }
240 238
@@ -247,8 +245,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
247 if (onsigstack && !likely(on_sig_stack(sp))) 245 if (onsigstack && !likely(on_sig_stack(sp)))
248 return (void __user *)-1L; 246 return (void __user *)-1L;
249 247
250 /* save i387 state */ 248 /* save i387 and extended state */
251 if (used_math() && save_i387_xstate(*fpstate) < 0) 249 if (used_math() &&
250 save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
252 return (void __user *)-1L; 251 return (void __user *)-1L;
253 252
254 return (void __user *)sp; 253 return (void __user *)sp;
@@ -357,7 +356,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
357 put_user_ex(sig, &frame->sig); 356 put_user_ex(sig, &frame->sig);
358 put_user_ex(&frame->info, &frame->pinfo); 357 put_user_ex(&frame->info, &frame->pinfo);
359 put_user_ex(&frame->uc, &frame->puc); 358 put_user_ex(&frame->uc, &frame->puc);
360 err |= copy_siginfo_to_user(&frame->info, info);
361 359
362 /* Create the ucontext. */ 360 /* Create the ucontext. */
363 if (cpu_has_xsave) 361 if (cpu_has_xsave)
@@ -369,9 +367,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
369 put_user_ex(sas_ss_flags(regs->sp), 367 put_user_ex(sas_ss_flags(regs->sp),
370 &frame->uc.uc_stack.ss_flags); 368 &frame->uc.uc_stack.ss_flags);
371 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 369 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
372 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
373 regs, set->sig[0]);
374 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
375 370
376 /* Set up to return from userspace. */ 371 /* Set up to return from userspace. */
377 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); 372 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -388,6 +383,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
388 */ 383 */
389 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); 384 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
390 } put_user_catch(err); 385 } put_user_catch(err);
386
387 err |= copy_siginfo_to_user(&frame->info, info);
388 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
389 regs, set->sig[0]);
390 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
391 391
392 if (err) 392 if (err)
393 return -EFAULT; 393 return -EFAULT;
@@ -436,8 +436,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
436 put_user_ex(sas_ss_flags(regs->sp), 436 put_user_ex(sas_ss_flags(regs->sp),
437 &frame->uc.uc_stack.ss_flags); 437 &frame->uc.uc_stack.ss_flags);
438 put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); 438 put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
439 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
440 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
441 439
442 /* Set up to return from userspace. If provided, use a stub 440 /* Set up to return from userspace. If provided, use a stub
443 already in userspace. */ 441 already in userspace. */
@@ -450,6 +448,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
450 } 448 }
451 } put_user_catch(err); 449 } put_user_catch(err);
452 450
451 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
452 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
453
453 if (err) 454 if (err)
454 return -EFAULT; 455 return -EFAULT;
455 456
@@ -474,6 +475,75 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
474} 475}
475#endif /* CONFIG_X86_32 */ 476#endif /* CONFIG_X86_32 */
476 477
478static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
479 siginfo_t *info, compat_sigset_t *set,
480 struct pt_regs *regs)
481{
482#ifdef CONFIG_X86_X32_ABI
483 struct rt_sigframe_x32 __user *frame;
484 void __user *restorer;
485 int err = 0;
486 void __user *fpstate = NULL;
487
488 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
489
490 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
491 return -EFAULT;
492
493 if (ka->sa.sa_flags & SA_SIGINFO) {
494 if (copy_siginfo_to_user32(&frame->info, info))
495 return -EFAULT;
496 }
497
498 put_user_try {
499 /* Create the ucontext. */
500 if (cpu_has_xsave)
501 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
502 else
503 put_user_ex(0, &frame->uc.uc_flags);
504 put_user_ex(0, &frame->uc.uc_link);
505 put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
506 put_user_ex(sas_ss_flags(regs->sp),
507 &frame->uc.uc_stack.ss_flags);
508 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
509 put_user_ex(0, &frame->uc.uc__pad0);
510
511 if (ka->sa.sa_flags & SA_RESTORER) {
512 restorer = ka->sa.sa_restorer;
513 } else {
514 /* could use a vstub here */
515 restorer = NULL;
516 err |= -EFAULT;
517 }
518 put_user_ex(restorer, &frame->pretcode);
519 } put_user_catch(err);
520
521 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
522 regs, set->sig[0]);
523 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
524
525 if (err)
526 return -EFAULT;
527
528 /* Set up registers for signal handler */
529 regs->sp = (unsigned long) frame;
530 regs->ip = (unsigned long) ka->sa.sa_handler;
531
532 /* We use the x32 calling convention here... */
533 regs->di = sig;
534 regs->si = (unsigned long) &frame->info;
535 regs->dx = (unsigned long) &frame->uc;
536
537 loadsegment(ds, __USER_DS);
538 loadsegment(es, __USER_DS);
539
540 regs->cs = __USER_CS;
541 regs->ss = __USER_DS;
542#endif /* CONFIG_X86_X32_ABI */
543
544 return 0;
545}
546
477#ifdef CONFIG_X86_32 547#ifdef CONFIG_X86_32
478/* 548/*
479 * Atomically swap in the new signal mask, and wait for a signal. 549 * Atomically swap in the new signal mask, and wait for a signal.
@@ -612,55 +682,22 @@ static int signr_convert(int sig)
612 return sig; 682 return sig;
613} 683}
614 684
615#ifdef CONFIG_X86_32
616
617#define is_ia32 1
618#define ia32_setup_frame __setup_frame
619#define ia32_setup_rt_frame __setup_rt_frame
620
621#else /* !CONFIG_X86_32 */
622
623#ifdef CONFIG_IA32_EMULATION
624#define is_ia32 test_thread_flag(TIF_IA32)
625#else /* !CONFIG_IA32_EMULATION */
626#define is_ia32 0
627#endif /* CONFIG_IA32_EMULATION */
628
629#ifdef CONFIG_X86_X32_ABI
630#define is_x32 test_thread_flag(TIF_X32)
631
632static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
633 siginfo_t *info, compat_sigset_t *set,
634 struct pt_regs *regs);
635#else /* !CONFIG_X86_X32_ABI */
636#define is_x32 0
637#endif /* CONFIG_X86_X32_ABI */
638
639int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
640 sigset_t *set, struct pt_regs *regs);
641int ia32_setup_frame(int sig, struct k_sigaction *ka,
642 sigset_t *set, struct pt_regs *regs);
643
644#endif /* CONFIG_X86_32 */
645
646static int 685static int
647setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 686setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
648 struct pt_regs *regs) 687 struct pt_regs *regs)
649{ 688{
650 int usig = signr_convert(sig); 689 int usig = signr_convert(sig);
651 sigset_t *set = sigmask_to_save(); 690 sigset_t *set = sigmask_to_save();
691 compat_sigset_t *cset = (compat_sigset_t *) set;
652 692
653 /* Set up the stack frame */ 693 /* Set up the stack frame */
654 if (is_ia32) { 694 if (is_ia32_frame()) {
655 if (ka->sa.sa_flags & SA_SIGINFO) 695 if (ka->sa.sa_flags & SA_SIGINFO)
656 return ia32_setup_rt_frame(usig, ka, info, set, regs); 696 return ia32_setup_rt_frame(usig, ka, info, cset, regs);
657 else 697 else
658 return ia32_setup_frame(usig, ka, set, regs); 698 return ia32_setup_frame(usig, ka, cset, regs);
659#ifdef CONFIG_X86_X32_ABI 699 } else if (is_x32_frame()) {
660 } else if (is_x32) { 700 return x32_setup_rt_frame(usig, ka, info, cset, regs);
661 return x32_setup_rt_frame(usig, ka, info,
662 (compat_sigset_t *)set, regs);
663#endif
664 } else { 701 } else {
665 return __setup_rt_frame(sig, ka, info, set, regs); 702 return __setup_rt_frame(sig, ka, info, set, regs);
666 } 703 }
@@ -779,6 +816,8 @@ static void do_signal(struct pt_regs *regs)
779void 816void
780do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 817do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
781{ 818{
819 rcu_user_exit();
820
782#ifdef CONFIG_X86_MCE 821#ifdef CONFIG_X86_MCE
783 /* notify userspace of pending MCEs */ 822 /* notify userspace of pending MCEs */
784 if (thread_info_flags & _TIF_MCE_NOTIFY) 823 if (thread_info_flags & _TIF_MCE_NOTIFY)
@@ -804,6 +843,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
804#ifdef CONFIG_X86_32 843#ifdef CONFIG_X86_32
805 clear_thread_flag(TIF_IRET); 844 clear_thread_flag(TIF_IRET);
806#endif /* CONFIG_X86_32 */ 845#endif /* CONFIG_X86_32 */
846
847 rcu_user_enter();
807} 848}
808 849
809void signal_fault(struct pt_regs *regs, void __user *frame, char *where) 850void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
@@ -824,72 +865,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
824} 865}
825 866
826#ifdef CONFIG_X86_X32_ABI 867#ifdef CONFIG_X86_X32_ABI
827static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
828 siginfo_t *info, compat_sigset_t *set,
829 struct pt_regs *regs)
830{
831 struct rt_sigframe_x32 __user *frame;
832 void __user *restorer;
833 int err = 0;
834 void __user *fpstate = NULL;
835
836 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
837
838 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
839 return -EFAULT;
840
841 if (ka->sa.sa_flags & SA_SIGINFO) {
842 if (copy_siginfo_to_user32(&frame->info, info))
843 return -EFAULT;
844 }
845
846 put_user_try {
847 /* Create the ucontext. */
848 if (cpu_has_xsave)
849 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
850 else
851 put_user_ex(0, &frame->uc.uc_flags);
852 put_user_ex(0, &frame->uc.uc_link);
853 put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
854 put_user_ex(sas_ss_flags(regs->sp),
855 &frame->uc.uc_stack.ss_flags);
856 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
857 put_user_ex(0, &frame->uc.uc__pad0);
858 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
859 regs, set->sig[0]);
860 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
861
862 if (ka->sa.sa_flags & SA_RESTORER) {
863 restorer = ka->sa.sa_restorer;
864 } else {
865 /* could use a vstub here */
866 restorer = NULL;
867 err |= -EFAULT;
868 }
869 put_user_ex(restorer, &frame->pretcode);
870 } put_user_catch(err);
871
872 if (err)
873 return -EFAULT;
874
875 /* Set up registers for signal handler */
876 regs->sp = (unsigned long) frame;
877 regs->ip = (unsigned long) ka->sa.sa_handler;
878
879 /* We use the x32 calling convention here... */
880 regs->di = sig;
881 regs->si = (unsigned long) &frame->info;
882 regs->dx = (unsigned long) &frame->uc;
883
884 loadsegment(ds, __USER_DS);
885 loadsegment(es, __USER_DS);
886
887 regs->cs = __USER_CS;
888 regs->ss = __USER_DS;
889
890 return 0;
891}
892
893asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) 868asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
894{ 869{
895 struct rt_sigframe_x32 __user *frame; 870 struct rt_sigframe_x32 __user *frame;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7c5a8c314c02..c80a33bc528b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
665 unsigned long boot_error = 0; 665 unsigned long boot_error = 0;
666 int timeout; 666 int timeout;
667 667
668 alternatives_smp_switch(1); 668 /* Just in case we booted with a single CPU. */
669 alternatives_enable_smp();
669 670
670 idle->thread.sp = (unsigned long) (((struct pt_regs *) 671 idle->thread.sp = (unsigned long) (((struct pt_regs *)
671 (THREAD_SIZE + task_stack_page(idle))) - 1); 672 (THREAD_SIZE + task_stack_page(idle))) - 1);
@@ -1053,20 +1054,6 @@ out:
1053 preempt_enable(); 1054 preempt_enable();
1054} 1055}
1055 1056
1056void arch_disable_nonboot_cpus_begin(void)
1057{
1058 /*
1059 * Avoid the smp alternatives switch during the disable_nonboot_cpus().
1060 * In the suspend path, we will be back in the SMP mode shortly anyways.
1061 */
1062 skip_smp_alternatives = true;
1063}
1064
1065void arch_disable_nonboot_cpus_end(void)
1066{
1067 skip_smp_alternatives = false;
1068}
1069
1070void arch_enable_nonboot_cpus_begin(void) 1057void arch_enable_nonboot_cpus_begin(void)
1071{ 1058{
1072 set_mtrr_aps_delayed_init(); 1059 set_mtrr_aps_delayed_init();
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu)
1256 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1243 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1257 if (system_state == SYSTEM_RUNNING) 1244 if (system_state == SYSTEM_RUNNING)
1258 pr_info("CPU %u is now offline\n", cpu); 1245 pr_info("CPU %u is now offline\n", cpu);
1259
1260 if (1 == num_online_cpus())
1261 alternatives_smp_switch(0);
1262 return; 1246 return;
1263 } 1247 }
1264 msleep(100); 1248 msleep(100);
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index c346d1161488..cd3b2438a980 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child)
157 return 1; 157 return 1;
158} 158}
159 159
160void set_task_blockstep(struct task_struct *task, bool on)
161{
162 unsigned long debugctl;
163
164 /*
165 * Ensure irq/preemption can't change debugctl in between.
166 * Note also that both TIF_BLOCKSTEP and debugctl should
167 * be changed atomically wrt preemption.
168 * FIXME: this means that set/clear TIF_BLOCKSTEP is simply
169 * wrong if task != current, SIGKILL can wakeup the stopped
170 * tracee and set/clear can play with the running task, this
171 * can confuse the next __switch_to_xtra().
172 */
173 local_irq_disable();
174 debugctl = get_debugctlmsr();
175 if (on) {
176 debugctl |= DEBUGCTLMSR_BTF;
177 set_tsk_thread_flag(task, TIF_BLOCKSTEP);
178 } else {
179 debugctl &= ~DEBUGCTLMSR_BTF;
180 clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
181 }
182 if (task == current)
183 update_debugctlmsr(debugctl);
184 local_irq_enable();
185}
186
160/* 187/*
161 * Enable single or block step. 188 * Enable single or block step.
162 */ 189 */
@@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block)
169 * So no one should try to use debugger block stepping in a program 196 * So no one should try to use debugger block stepping in a program
170 * that uses user-mode single stepping itself. 197 * that uses user-mode single stepping itself.
171 */ 198 */
172 if (enable_single_step(child) && block) { 199 if (enable_single_step(child) && block)
173 unsigned long debugctl = get_debugctlmsr(); 200 set_task_blockstep(child, true);
174 201 else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
175 debugctl |= DEBUGCTLMSR_BTF; 202 set_task_blockstep(child, false);
176 update_debugctlmsr(debugctl);
177 set_tsk_thread_flag(child, TIF_BLOCKSTEP);
178 } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
179 unsigned long debugctl = get_debugctlmsr();
180
181 debugctl &= ~DEBUGCTLMSR_BTF;
182 update_debugctlmsr(debugctl);
183 clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
184 }
185} 203}
186 204
187void user_enable_single_step(struct task_struct *child) 205void user_enable_single_step(struct task_struct *child)
@@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child)
199 /* 217 /*
200 * Make sure block stepping (BTF) is disabled. 218 * Make sure block stepping (BTF) is disabled.
201 */ 219 */
202 if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { 220 if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
203 unsigned long debugctl = get_debugctlmsr(); 221 set_task_blockstep(child, false);
204
205 debugctl &= ~DEBUGCTLMSR_BTF;
206 update_debugctlmsr(debugctl);
207 clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
208 }
209 222
210 /* Always clear TIF_SINGLESTEP... */ 223 /* Always clear TIF_SINGLESTEP... */
211 clear_tsk_thread_flag(child, TIF_SINGLESTEP); 224 clear_tsk_thread_flag(child, TIF_SINGLESTEP);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c9369..8276dc6794cc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,6 +55,7 @@
55#include <asm/i387.h> 55#include <asm/i387.h>
56#include <asm/fpu-internal.h> 56#include <asm/fpu-internal.h>
57#include <asm/mce.h> 57#include <asm/mce.h>
58#include <asm/rcu.h>
58 59
59#include <asm/mach_traps.h> 60#include <asm/mach_traps.h>
60 61
@@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
107 dec_preempt_count(); 108 dec_preempt_count();
108} 109}
109 110
110static void __kprobes 111static int __kprobes
111do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, 112do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
112 long error_code, siginfo_t *info) 113 struct pt_regs *regs, long error_code)
113{ 114{
114 struct task_struct *tsk = current;
115
116#ifdef CONFIG_X86_32 115#ifdef CONFIG_X86_32
117 if (regs->flags & X86_VM_MASK) { 116 if (regs->flags & X86_VM_MASK) {
118 /* 117 /*
119 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. 118 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
120 * On nmi (interrupt 2), do_trap should not be called. 119 * On nmi (interrupt 2), do_trap should not be called.
121 */ 120 */
122 if (trapnr < X86_TRAP_UD) 121 if (trapnr < X86_TRAP_UD) {
123 goto vm86_trap; 122 if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
124 goto trap_signal; 123 error_code, trapnr))
124 return 0;
125 }
126 return -1;
125 } 127 }
126#endif 128#endif
129 if (!user_mode(regs)) {
130 if (!fixup_exception(regs)) {
131 tsk->thread.error_code = error_code;
132 tsk->thread.trap_nr = trapnr;
133 die(str, regs, error_code);
134 }
135 return 0;
136 }
127 137
128 if (!user_mode(regs)) 138 return -1;
129 goto kernel_trap; 139}
130 140
131#ifdef CONFIG_X86_32 141static void __kprobes
132trap_signal: 142do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
133#endif 143 long error_code, siginfo_t *info)
144{
145 struct task_struct *tsk = current;
146
147
148 if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
149 return;
134 /* 150 /*
135 * We want error_code and trap_nr set for userspace faults and 151 * We want error_code and trap_nr set for userspace faults and
136 * kernelspace faults which result in die(), but not 152 * kernelspace faults which result in die(), but not
@@ -158,33 +174,20 @@ trap_signal:
158 force_sig_info(signr, info, tsk); 174 force_sig_info(signr, info, tsk);
159 else 175 else
160 force_sig(signr, tsk); 176 force_sig(signr, tsk);
161 return;
162
163kernel_trap:
164 if (!fixup_exception(regs)) {
165 tsk->thread.error_code = error_code;
166 tsk->thread.trap_nr = trapnr;
167 die(str, regs, error_code);
168 }
169 return;
170
171#ifdef CONFIG_X86_32
172vm86_trap:
173 if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
174 error_code, trapnr))
175 goto trap_signal;
176 return;
177#endif
178} 177}
179 178
180#define DO_ERROR(trapnr, signr, str, name) \ 179#define DO_ERROR(trapnr, signr, str, name) \
181dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 180dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
182{ \ 181{ \
183 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 182 exception_enter(regs); \
184 == NOTIFY_STOP) \ 183 if (notify_die(DIE_TRAP, str, regs, error_code, \
184 trapnr, signr) == NOTIFY_STOP) { \
185 exception_exit(regs); \
185 return; \ 186 return; \
187 } \
186 conditional_sti(regs); \ 188 conditional_sti(regs); \
187 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 189 do_trap(trapnr, signr, str, regs, error_code, NULL); \
190 exception_exit(regs); \
188} 191}
189 192
190#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 193#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
@@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
195 info.si_errno = 0; \ 198 info.si_errno = 0; \
196 info.si_code = sicode; \ 199 info.si_code = sicode; \
197 info.si_addr = (void __user *)siaddr; \ 200 info.si_addr = (void __user *)siaddr; \
198 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 201 exception_enter(regs); \
199 == NOTIFY_STOP) \ 202 if (notify_die(DIE_TRAP, str, regs, error_code, \
203 trapnr, signr) == NOTIFY_STOP) { \
204 exception_exit(regs); \
200 return; \ 205 return; \
206 } \
201 conditional_sti(regs); \ 207 conditional_sti(regs); \
202 do_trap(trapnr, signr, str, regs, error_code, &info); \ 208 do_trap(trapnr, signr, str, regs, error_code, &info); \
209 exception_exit(regs); \
203} 210}
204 211
205DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, 212DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
@@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
222/* Runs on IST stack */ 229/* Runs on IST stack */
223dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) 230dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
224{ 231{
232 exception_enter(regs);
225 if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 233 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
226 X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) 234 X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
227 return; 235 preempt_conditional_sti(regs);
228 preempt_conditional_sti(regs); 236 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
229 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); 237 preempt_conditional_cli(regs);
230 preempt_conditional_cli(regs); 238 }
239 exception_exit(regs);
231} 240}
232 241
233dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 242dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
235 static const char str[] = "double fault"; 244 static const char str[] = "double fault";
236 struct task_struct *tsk = current; 245 struct task_struct *tsk = current;
237 246
247 exception_enter(regs);
238 /* Return not checked because double check cannot be ignored */ 248 /* Return not checked because double check cannot be ignored */
239 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 249 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
240 250
@@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
255{ 265{
256 struct task_struct *tsk; 266 struct task_struct *tsk;
257 267
268 exception_enter(regs);
258 conditional_sti(regs); 269 conditional_sti(regs);
259 270
260#ifdef CONFIG_X86_32 271#ifdef CONFIG_X86_32
261 if (regs->flags & X86_VM_MASK) 272 if (regs->flags & X86_VM_MASK) {
262 goto gp_in_vm86; 273 local_irq_enable();
274 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
275 goto exit;
276 }
263#endif 277#endif
264 278
265 tsk = current; 279 tsk = current;
266 if (!user_mode(regs)) 280 if (!user_mode(regs)) {
267 goto gp_in_kernel; 281 if (fixup_exception(regs))
282 goto exit;
283
284 tsk->thread.error_code = error_code;
285 tsk->thread.trap_nr = X86_TRAP_GP;
286 if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
287 X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
288 die("general protection fault", regs, error_code);
289 goto exit;
290 }
268 291
269 tsk->thread.error_code = error_code; 292 tsk->thread.error_code = error_code;
270 tsk->thread.trap_nr = X86_TRAP_GP; 293 tsk->thread.trap_nr = X86_TRAP_GP;
@@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
279 } 302 }
280 303
281 force_sig(SIGSEGV, tsk); 304 force_sig(SIGSEGV, tsk);
282 return; 305exit:
283 306 exception_exit(regs);
284#ifdef CONFIG_X86_32
285gp_in_vm86:
286 local_irq_enable();
287 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
288 return;
289#endif
290
291gp_in_kernel:
292 if (fixup_exception(regs))
293 return;
294
295 tsk->thread.error_code = error_code;
296 tsk->thread.trap_nr = X86_TRAP_GP;
297 if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
298 X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
299 return;
300 die("general protection fault", regs, error_code);
301} 307}
302 308
303/* May run on IST stack. */ 309/* May run on IST stack. */
@@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
312 ftrace_int3_handler(regs)) 318 ftrace_int3_handler(regs))
313 return; 319 return;
314#endif 320#endif
321 exception_enter(regs);
315#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 322#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
316 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 323 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
317 SIGTRAP) == NOTIFY_STOP) 324 SIGTRAP) == NOTIFY_STOP)
318 return; 325 goto exit;
319#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 326#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
320 327
321 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 328 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
322 SIGTRAP) == NOTIFY_STOP) 329 SIGTRAP) == NOTIFY_STOP)
323 return; 330 goto exit;
324 331
325 /* 332 /*
326 * Let others (NMI) know that the debug stack is in use 333 * Let others (NMI) know that the debug stack is in use
@@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
331 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); 338 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
332 preempt_conditional_cli(regs); 339 preempt_conditional_cli(regs);
333 debug_stack_usage_dec(); 340 debug_stack_usage_dec();
341exit:
342 exception_exit(regs);
334} 343}
335 344
336#ifdef CONFIG_X86_64 345#ifdef CONFIG_X86_64
@@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
391 unsigned long dr6; 400 unsigned long dr6;
392 int si_code; 401 int si_code;
393 402
403 exception_enter(regs);
404
394 get_debugreg(dr6, 6); 405 get_debugreg(dr6, 6);
395 406
396 /* Filter out all the reserved bits which are preset to 1 */ 407 /* Filter out all the reserved bits which are preset to 1 */
@@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
406 417
407 /* Catch kmemcheck conditions first of all! */ 418 /* Catch kmemcheck conditions first of all! */
408 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 419 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
409 return; 420 goto exit;
410 421
411 /* DR6 may or may not be cleared by the CPU */ 422 /* DR6 may or may not be cleared by the CPU */
412 set_debugreg(0, 6); 423 set_debugreg(0, 6);
@@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
421 432
422 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, 433 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
423 SIGTRAP) == NOTIFY_STOP) 434 SIGTRAP) == NOTIFY_STOP)
424 return; 435 goto exit;
425 436
426 /* 437 /*
427 * Let others (NMI) know that the debug stack is in use 438 * Let others (NMI) know that the debug stack is in use
@@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
437 X86_TRAP_DB); 448 X86_TRAP_DB);
438 preempt_conditional_cli(regs); 449 preempt_conditional_cli(regs);
439 debug_stack_usage_dec(); 450 debug_stack_usage_dec();
440 return; 451 goto exit;
441 } 452 }
442 453
443 /* 454 /*
@@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
458 preempt_conditional_cli(regs); 469 preempt_conditional_cli(regs);
459 debug_stack_usage_dec(); 470 debug_stack_usage_dec();
460 471
461 return; 472exit:
473 exception_exit(regs);
462} 474}
463 475
464/* 476/*
@@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
555#ifdef CONFIG_X86_32 567#ifdef CONFIG_X86_32
556 ignore_fpu_irq = 1; 568 ignore_fpu_irq = 1;
557#endif 569#endif
558 570 exception_enter(regs);
559 math_error(regs, error_code, X86_TRAP_MF); 571 math_error(regs, error_code, X86_TRAP_MF);
572 exception_exit(regs);
560} 573}
561 574
562dotraplinkage void 575dotraplinkage void
563do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 576do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
564{ 577{
578 exception_enter(regs);
565 math_error(regs, error_code, X86_TRAP_XF); 579 math_error(regs, error_code, X86_TRAP_XF);
580 exception_exit(regs);
566} 581}
567 582
568dotraplinkage void 583dotraplinkage void
@@ -613,11 +628,12 @@ void math_state_restore(void)
613 } 628 }
614 629
615 __thread_fpu_begin(tsk); 630 __thread_fpu_begin(tsk);
631
616 /* 632 /*
617 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 633 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
618 */ 634 */
619 if (unlikely(restore_fpu_checking(tsk))) { 635 if (unlikely(restore_fpu_checking(tsk))) {
620 __thread_fpu_end(tsk); 636 drop_init_fpu(tsk);
621 force_sig(SIGSEGV, tsk); 637 force_sig(SIGSEGV, tsk);
622 return; 638 return;
623 } 639 }
@@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore);
629dotraplinkage void __kprobes 645dotraplinkage void __kprobes
630do_device_not_available(struct pt_regs *regs, long error_code) 646do_device_not_available(struct pt_regs *regs, long error_code)
631{ 647{
648 exception_enter(regs);
649 BUG_ON(use_eager_fpu());
650
632#ifdef CONFIG_MATH_EMULATION 651#ifdef CONFIG_MATH_EMULATION
633 if (read_cr0() & X86_CR0_EM) { 652 if (read_cr0() & X86_CR0_EM) {
634 struct math_emu_info info = { }; 653 struct math_emu_info info = { };
@@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
637 656
638 info.regs = regs; 657 info.regs = regs;
639 math_emulate(&info); 658 math_emulate(&info);
659 exception_exit(regs);
640 return; 660 return;
641 } 661 }
642#endif 662#endif
@@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
644#ifdef CONFIG_X86_32 664#ifdef CONFIG_X86_32
645 conditional_sti(regs); 665 conditional_sti(regs);
646#endif 666#endif
667 exception_exit(regs);
647} 668}
648 669
649#ifdef CONFIG_X86_32 670#ifdef CONFIG_X86_32
650dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) 671dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
651{ 672{
652 siginfo_t info; 673 siginfo_t info;
674
675 exception_enter(regs);
653 local_irq_enable(); 676 local_irq_enable();
654 677
655 info.si_signo = SIGILL; 678 info.si_signo = SIGILL;
@@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
657 info.si_code = ILL_BADSTK; 680 info.si_code = ILL_BADSTK;
658 info.si_addr = NULL; 681 info.si_addr = NULL;
659 if (notify_die(DIE_TRAP, "iret exception", regs, error_code, 682 if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
660 X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) 683 X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
661 return; 684 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
662 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, 685 &info);
663 &info); 686 }
687 exception_exit(regs);
664} 688}
665#endif 689#endif
666 690
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 36fd42091fa7..9538f00827a9 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -41,6 +41,9 @@
41/* Adjust the return address of a call insn */ 41/* Adjust the return address of a call insn */
42#define UPROBE_FIX_CALL 0x2 42#define UPROBE_FIX_CALL 0x2
43 43
44/* Instruction will modify TF, don't change it */
45#define UPROBE_FIX_SETF 0x4
46
44#define UPROBE_FIX_RIP_AX 0x8000 47#define UPROBE_FIX_RIP_AX 0x8000
45#define UPROBE_FIX_RIP_CX 0x4000 48#define UPROBE_FIX_RIP_CX 0x4000
46 49
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
239 insn_get_opcode(insn); /* should be a nop */ 242 insn_get_opcode(insn); /* should be a nop */
240 243
241 switch (OPCODE1(insn)) { 244 switch (OPCODE1(insn)) {
245 case 0x9d:
246 /* popf */
247 auprobe->fixups |= UPROBE_FIX_SETF;
248 break;
242 case 0xc3: /* ret/lret */ 249 case 0xc3: /* ret/lret */
243 case 0xcb: 250 case 0xcb:
244 case 0xc2: 251 case 0xc2:
@@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
646 * Skip these instructions as per the currently known x86 ISA. 653 * Skip these instructions as per the currently known x86 ISA.
647 * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } 654 * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
648 */ 655 */
649bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) 656static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
650{ 657{
651 int i; 658 int i;
652 659
@@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
673 } 680 }
674 return false; 681 return false;
675} 682}
683
684bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
685{
686 bool ret = __skip_sstep(auprobe, regs);
687 if (ret && (regs->flags & X86_EFLAGS_TF))
688 send_sig(SIGTRAP, current, 0);
689 return ret;
690}
691
692void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
693{
694 struct task_struct *task = current;
695 struct arch_uprobe_task *autask = &task->utask->autask;
696 struct pt_regs *regs = task_pt_regs(task);
697
698 autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
699
700 regs->flags |= X86_EFLAGS_TF;
701 if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
702 set_task_blockstep(task, false);
703}
704
705void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
706{
707 struct task_struct *task = current;
708 struct arch_uprobe_task *autask = &task->utask->autask;
709 bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
710 struct pt_regs *regs = task_pt_regs(task);
711 /*
712 * The state of TIF_BLOCKSTEP was not saved so we can get an extra
713 * SIGTRAP if we do not clear TF. We need to examine the opcode to
714 * make it right.
715 */
716 if (unlikely(trapped)) {
717 if (!autask->saved_tf)
718 regs->flags &= ~X86_EFLAGS_TF;
719 } else {
720 if (autask->saved_tf)
721 send_sig(SIGTRAP, task, 0);
722 else if (!(auprobe->fixups & UPROBE_FIX_SETF))
723 regs->flags &= ~X86_EFLAGS_TF;
724 }
725}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 6020f6f5927c..1330dd102950 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -13,9 +13,13 @@
13#include <asm/ftrace.h> 13#include <asm/ftrace.h>
14 14
15#ifdef CONFIG_FUNCTION_TRACER 15#ifdef CONFIG_FUNCTION_TRACER
16/* mcount is defined in assembly */ 16/* mcount and __fentry__ are defined in assembly */
17#ifdef CC_USING_FENTRY
18EXPORT_SYMBOL(__fentry__);
19#else
17EXPORT_SYMBOL(mcount); 20EXPORT_SYMBOL(mcount);
18#endif 21#endif
22#endif
19 23
20EXPORT_SYMBOL(__get_user_1); 24EXPORT_SYMBOL(__get_user_1);
21EXPORT_SYMBOL(__get_user_2); 25EXPORT_SYMBOL(__get_user_2);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 9f3167e891ef..7a3d075a814a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,6 @@
26 26
27void __cpuinit x86_init_noop(void) { } 27void __cpuinit x86_init_noop(void) { }
28void __init x86_init_uint_noop(unsigned int unused) { } 28void __init x86_init_uint_noop(unsigned int unused) { }
29void __init x86_init_pgd_noop(pgd_t *unused) { }
30int __init iommu_init_noop(void) { return 0; } 29int __init iommu_init_noop(void) { return 0; }
31void iommu_shutdown_noop(void) { } 30void iommu_shutdown_noop(void) { }
32 31
@@ -68,8 +67,7 @@ struct x86_init_ops x86_init __initdata = {
68 }, 67 },
69 68
70 .paging = { 69 .paging = {
71 .pagetable_setup_start = native_pagetable_setup_start, 70 .pagetable_init = native_pagetable_init,
72 .pagetable_setup_done = native_pagetable_setup_done,
73 }, 71 },
74 72
75 .timers = { 73 .timers = {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 3d3e20709119..ada87a329edc 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -10,9 +10,7 @@
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <asm/i387.h> 11#include <asm/i387.h>
12#include <asm/fpu-internal.h> 12#include <asm/fpu-internal.h>
13#ifdef CONFIG_IA32_EMULATION 13#include <asm/sigframe.h>
14#include <asm/sigcontext32.h>
15#endif
16#include <asm/xcr.h> 14#include <asm/xcr.h>
17 15
18/* 16/*
@@ -23,13 +21,9 @@ u64 pcntxt_mask;
23/* 21/*
24 * Represents init state for the supported extended state. 22 * Represents init state for the supported extended state.
25 */ 23 */
26static struct xsave_struct *init_xstate_buf; 24struct xsave_struct *init_xstate_buf;
27
28struct _fpx_sw_bytes fx_sw_reserved;
29#ifdef CONFIG_IA32_EMULATION
30struct _fpx_sw_bytes fx_sw_reserved_ia32;
31#endif
32 25
26static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
33static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; 27static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
34 28
35/* 29/*
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
44 */ 38 */
45void __sanitize_i387_state(struct task_struct *tsk) 39void __sanitize_i387_state(struct task_struct *tsk)
46{ 40{
47 u64 xstate_bv;
48 int feature_bit = 0x2;
49 struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; 41 struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
42 int feature_bit = 0x2;
43 u64 xstate_bv;
50 44
51 if (!fx) 45 if (!fx)
52 return; 46 return;
@@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk)
104 * Check for the presence of extended state information in the 98 * Check for the presence of extended state information in the
105 * user fpstate pointer in the sigcontext. 99 * user fpstate pointer in the sigcontext.
106 */ 100 */
107int check_for_xstate(struct i387_fxsave_struct __user *buf, 101static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
108 void __user *fpstate, 102 void __user *fpstate,
109 struct _fpx_sw_bytes *fx_sw_user) 103 struct _fpx_sw_bytes *fx_sw)
110{ 104{
111 int min_xstate_size = sizeof(struct i387_fxsave_struct) + 105 int min_xstate_size = sizeof(struct i387_fxsave_struct) +
112 sizeof(struct xsave_hdr_struct); 106 sizeof(struct xsave_hdr_struct);
113 unsigned int magic2; 107 unsigned int magic2;
114 int err;
115 108
116 err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], 109 if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
117 sizeof(struct _fpx_sw_bytes)); 110 return -1;
118 if (err)
119 return -EFAULT;
120 111
121 /* 112 /* Check for the first magic field and other error scenarios. */
122 * First Magic check failed. 113 if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
123 */ 114 fx_sw->xstate_size < min_xstate_size ||
124 if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) 115 fx_sw->xstate_size > xstate_size ||
125 return -EINVAL; 116 fx_sw->xstate_size > fx_sw->extended_size)
117 return -1;
126 118
127 /* 119 /*
128 * Check for error scenarios.
129 */
130 if (fx_sw_user->xstate_size < min_xstate_size ||
131 fx_sw_user->xstate_size > xstate_size ||
132 fx_sw_user->xstate_size > fx_sw_user->extended_size)
133 return -EINVAL;
134
135 err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
136 fx_sw_user->extended_size -
137 FP_XSTATE_MAGIC2_SIZE));
138 if (err)
139 return err;
140 /*
141 * Check for the presence of second magic word at the end of memory 120 * Check for the presence of second magic word at the end of memory
142 * layout. This detects the case where the user just copied the legacy 121 * layout. This detects the case where the user just copied the legacy
143 * fpstate layout with out copying the extended state information 122 * fpstate layout with out copying the extended state information
144 * in the memory layout. 123 * in the memory layout.
145 */ 124 */
146 if (magic2 != FP_XSTATE_MAGIC2) 125 if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
147 return -EFAULT; 126 || magic2 != FP_XSTATE_MAGIC2)
127 return -1;
148 128
149 return 0; 129 return 0;
150} 130}
151 131
152#ifdef CONFIG_X86_64
153/* 132/*
154 * Signal frame handlers. 133 * Signal frame handlers.
155 */ 134 */
156 135static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
157int save_i387_xstate(void __user *buf)
158{ 136{
159 struct task_struct *tsk = current; 137 if (use_fxsr()) {
160 int err = 0; 138 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
161 139 struct user_i387_ia32_struct env;
162 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) 140 struct _fpstate_ia32 __user *fp = buf;
163 return -EACCES;
164 141
165 BUG_ON(sig_xstate_size < xstate_size); 142 convert_from_fxsr(&env, tsk);
166 143
167 if ((unsigned long)buf % 64) 144 if (__copy_to_user(buf, &env, sizeof(env)) ||
168 pr_err("%s: bad fpstate %p\n", __func__, buf); 145 __put_user(xsave->i387.swd, &fp->status) ||
169 146 __put_user(X86_FXSR_MAGIC, &fp->magic))
170 if (!used_math()) 147 return -1;
171 return 0;
172
173 if (user_has_fpu()) {
174 if (use_xsave())
175 err = xsave_user(buf);
176 else
177 err = fxsave_user(buf);
178
179 if (err)
180 return err;
181 user_fpu_end();
182 } else { 148 } else {
183 sanitize_i387_state(tsk); 149 struct i387_fsave_struct __user *fp = buf;
184 if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, 150 u32 swd;
185 xstate_size)) 151 if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
186 return -1; 152 return -1;
187 } 153 }
188 154
189 clear_used_math(); /* trigger finit */ 155 return 0;
156}
190 157
191 if (use_xsave()) { 158static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
192 struct _fpstate __user *fx = buf; 159{
193 struct _xstate __user *x = buf; 160 struct xsave_struct __user *x = buf;
194 u64 xstate_bv; 161 struct _fpx_sw_bytes *sw_bytes;
162 u32 xstate_bv;
163 int err;
195 164
196 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, 165 /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
197 sizeof(struct _fpx_sw_bytes)); 166 sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
167 err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
198 168
199 err |= __put_user(FP_XSTATE_MAGIC2, 169 if (!use_xsave())
200 (__u32 __user *) (buf + sig_xstate_size 170 return err;
201 - FP_XSTATE_MAGIC2_SIZE));
202 171
203 /* 172 err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
204 * Read the xstate_bv which we copied (directly from the cpu or
205 * from the state in task struct) to the user buffers and
206 * set the FP/SSE bits.
207 */
208 err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
209 173
210 /* 174 /*
211 * For legacy compatible, we always set FP/SSE bits in the bit 175 * Read the xstate_bv which we copied (directly from the cpu or
212 * vector while saving the state to the user context. This will 176 * from the state in task struct) to the user buffers.
213 * enable us capturing any changes(during sigreturn) to 177 */
214 * the FP/SSE bits by the legacy applications which don't touch 178 err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
215 * xstate_bv in the xsave header.
216 *
217 * xsave aware apps can change the xstate_bv in the xsave
218 * header as well as change any contents in the memory layout.
219 * xrestore as part of sigreturn will capture all the changes.
220 */
221 xstate_bv |= XSTATE_FPSSE;
222 179
223 err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); 180 /*
181 * For legacy compatible, we always set FP/SSE bits in the bit
182 * vector while saving the state to the user context. This will
183 * enable us capturing any changes(during sigreturn) to
184 * the FP/SSE bits by the legacy applications which don't touch
185 * xstate_bv in the xsave header.
186 *
187 * xsave aware apps can change the xstate_bv in the xsave
188 * header as well as change any contents in the memory layout.
189 * xrestore as part of sigreturn will capture all the changes.
190 */
191 xstate_bv |= XSTATE_FPSSE;
224 192
225 if (err) 193 err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
226 return err;
227 }
228 194
229 return 1; 195 return err;
196}
197
198static inline int save_user_xstate(struct xsave_struct __user *buf)
199{
200 int err;
201
202 if (use_xsave())
203 err = xsave_user(buf);
204 else if (use_fxsr())
205 err = fxsave_user((struct i387_fxsave_struct __user *) buf);
206 else
207 err = fsave_user((struct i387_fsave_struct __user *) buf);
208
209 if (unlikely(err) && __clear_user(buf, xstate_size))
210 err = -EFAULT;
211 return err;
230} 212}
231 213
232/* 214/*
233 * Restore the extended state if present. Otherwise, restore the FP/SSE 215 * Save the fpu, extended register state to the user signal frame.
234 * state. 216 *
217 * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
218 * state is copied.
219 * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
220 *
221 * buf == buf_fx for 64-bit frames and 32-bit fsave frame.
222 * buf != buf_fx for 32-bit frames with fxstate.
223 *
224 * If the fpu, extended register state is live, save the state directly
225 * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
226 * copy the thread's fpu state to the user frame starting at 'buf_fx'.
227 *
228 * If this is a 32-bit frame with fxstate, put a fsave header before
229 * the aligned state at 'buf_fx'.
230 *
231 * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
232 * indicating the absence/presence of the extended state to the user.
235 */ 233 */
236static int restore_user_xstate(void __user *buf) 234int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
237{ 235{
238 struct _fpx_sw_bytes fx_sw_user; 236 struct xsave_struct *xsave = &current->thread.fpu.state->xsave;
239 u64 mask; 237 struct task_struct *tsk = current;
240 int err; 238 int ia32_fxstate = (buf != buf_fx);
241 239
242 if (((unsigned long)buf % 64) || 240 ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
243 check_for_xstate(buf, buf, &fx_sw_user)) 241 config_enabled(CONFIG_IA32_EMULATION));
244 goto fx_only;
245 242
246 mask = fx_sw_user.xstate_bv; 243 if (!access_ok(VERIFY_WRITE, buf, size))
244 return -EACCES;
247 245
248 /* 246 if (!HAVE_HWFP)
249 * restore the state passed by the user. 247 return fpregs_soft_get(current, NULL, 0,
250 */ 248 sizeof(struct user_i387_ia32_struct), NULL,
251 err = xrestore_user(buf, mask); 249 (struct _fpstate_ia32 __user *) buf) ? -1 : 1;
252 if (err)
253 return err;
254 250
255 /* 251 if (user_has_fpu()) {
256 * init the state skipped by the user. 252 /* Save the live register state to the user directly. */
257 */ 253 if (save_user_xstate(buf_fx))
258 mask = pcntxt_mask & ~mask; 254 return -1;
259 if (unlikely(mask)) 255 /* Update the thread's fxstate to save the fsave header. */
260 xrstor_state(init_xstate_buf, mask); 256 if (ia32_fxstate)
257 fpu_fxsave(&tsk->thread.fpu);
258 } else {
259 sanitize_i387_state(tsk);
260 if (__copy_to_user(buf_fx, xsave, xstate_size))
261 return -1;
262 }
263
264 /* Save the fsave header for the 32-bit frames. */
265 if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
266 return -1;
267
268 if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
269 return -1;
270
271 drop_init_fpu(tsk); /* trigger finit */
261 272
262 return 0; 273 return 0;
274}
263 275
264fx_only: 276static inline void
265 /* 277sanitize_restored_xstate(struct task_struct *tsk,
266 * couldn't find the extended state information in the 278 struct user_i387_ia32_struct *ia32_env,
267 * memory layout. Restore just the FP/SSE and init all 279 u64 xstate_bv, int fx_only)
268 * the other extended state. 280{
269 */ 281 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
270 xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); 282 struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
271 return fxrstor_checking((__force struct i387_fxsave_struct *)buf); 283
284 if (use_xsave()) {
285 /* These bits must be zero. */
286 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
287
288 /*
289 * Init the state that is not present in the memory
290 * layout and not enabled by the OS.
291 */
292 if (fx_only)
293 xsave_hdr->xstate_bv = XSTATE_FPSSE;
294 else
295 xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
296 }
297
298 if (use_fxsr()) {
299 /*
300 * mscsr reserved bits must be masked to zero for security
301 * reasons.
302 */
303 xsave->i387.mxcsr &= mxcsr_feature_mask;
304
305 convert_to_fxsr(tsk, ia32_env);
306 }
272} 307}
273 308
274/* 309/*
275 * This restores directly out of user space. Exceptions are handled. 310 * Restore the extended state if present. Otherwise, restore the FP/SSE state.
276 */ 311 */
277int restore_i387_xstate(void __user *buf) 312static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
278{ 313{
314 if (use_xsave()) {
315 if ((unsigned long)buf % 64 || fx_only) {
316 u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
317 xrstor_state(init_xstate_buf, init_bv);
318 return fxrstor_user(buf);
319 } else {
320 u64 init_bv = pcntxt_mask & ~xbv;
321 if (unlikely(init_bv))
322 xrstor_state(init_xstate_buf, init_bv);
323 return xrestore_user(buf, xbv);
324 }
325 } else if (use_fxsr()) {
326 return fxrstor_user(buf);
327 } else
328 return frstor_user(buf);
329}
330
331int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
332{
333 int ia32_fxstate = (buf != buf_fx);
279 struct task_struct *tsk = current; 334 struct task_struct *tsk = current;
280 int err = 0; 335 int state_size = xstate_size;
336 u64 xstate_bv = 0;
337 int fx_only = 0;
338
339 ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
340 config_enabled(CONFIG_IA32_EMULATION));
281 341
282 if (!buf) { 342 if (!buf) {
283 if (used_math()) 343 drop_init_fpu(tsk);
284 goto clear;
285 return 0; 344 return 0;
286 } else 345 }
287 if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
288 return -EACCES;
289 346
290 if (!used_math()) { 347 if (!access_ok(VERIFY_READ, buf, size))
291 err = init_fpu(tsk); 348 return -EACCES;
292 if (err) 349
293 return err; 350 if (!used_math() && init_fpu(tsk))
351 return -1;
352
353 if (!HAVE_HWFP) {
354 return fpregs_soft_set(current, NULL,
355 0, sizeof(struct user_i387_ia32_struct),
356 NULL, buf) != 0;
294 } 357 }
295 358
296 user_fpu_begin(); 359 if (use_xsave()) {
297 if (use_xsave()) 360 struct _fpx_sw_bytes fx_sw_user;
298 err = restore_user_xstate(buf); 361 if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
299 else 362 /*
300 err = fxrstor_checking((__force struct i387_fxsave_struct *) 363 * Couldn't find the extended state information in the
301 buf); 364 * memory layout. Restore just the FP/SSE and init all
302 if (unlikely(err)) { 365 * the other extended state.
366 */
367 state_size = sizeof(struct i387_fxsave_struct);
368 fx_only = 1;
369 } else {
370 state_size = fx_sw_user.xstate_size;
371 xstate_bv = fx_sw_user.xstate_bv;
372 }
373 }
374
375 if (ia32_fxstate) {
376 /*
377 * For 32-bit frames with fxstate, copy the user state to the
378 * thread's fpu state, reconstruct fxstate from the fsave
379 * header. Sanitize the copied state etc.
380 */
381 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
382 struct user_i387_ia32_struct env;
383 int err = 0;
384
385 /*
386 * Drop the current fpu which clears used_math(). This ensures
387 * that any context-switch during the copy of the new state,
388 * avoids the intermediate state from getting restored/saved.
389 * Thus avoiding the new restored state from getting corrupted.
390 * We will be ready to restore/save the state only after
391 * set_used_math() is again set.
392 */
393 drop_fpu(tsk);
394
395 if (__copy_from_user(xsave, buf_fx, state_size) ||
396 __copy_from_user(&env, buf, sizeof(env))) {
397 err = -1;
398 } else {
399 sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
400 set_used_math();
401 }
402
403 if (use_eager_fpu())
404 math_state_restore();
405
406 return err;
407 } else {
303 /* 408 /*
304 * Encountered an error while doing the restore from the 409 * For 64-bit frames and 32-bit fsave frames, restore the user
305 * user buffer, clear the fpu state. 410 * state to the registers directly (with exceptions handled).
306 */ 411 */
307clear: 412 user_fpu_begin();
308 clear_fpu(tsk); 413 if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
309 clear_used_math(); 414 drop_init_fpu(tsk);
415 return -1;
416 }
310 } 417 }
311 return err; 418
419 return 0;
312} 420}
313#endif
314 421
315/* 422/*
316 * Prepare the SW reserved portion of the fxsave memory layout, indicating 423 * Prepare the SW reserved portion of the fxsave memory layout, indicating
@@ -321,31 +428,22 @@ clear:
321 */ 428 */
322static void prepare_fx_sw_frame(void) 429static void prepare_fx_sw_frame(void)
323{ 430{
324 int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + 431 int fsave_header_size = sizeof(struct i387_fsave_struct);
325 FP_XSTATE_MAGIC2_SIZE; 432 int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
326 433
327 sig_xstate_size = sizeof(struct _fpstate) + size_extended; 434 if (config_enabled(CONFIG_X86_32))
328 435 size += fsave_header_size;
329#ifdef CONFIG_IA32_EMULATION
330 sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
331#endif
332
333 memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
334 436
335 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; 437 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
336 fx_sw_reserved.extended_size = sig_xstate_size; 438 fx_sw_reserved.extended_size = size;
337 fx_sw_reserved.xstate_bv = pcntxt_mask; 439 fx_sw_reserved.xstate_bv = pcntxt_mask;
338 fx_sw_reserved.xstate_size = xstate_size; 440 fx_sw_reserved.xstate_size = xstate_size;
339#ifdef CONFIG_IA32_EMULATION
340 memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
341 sizeof(struct _fpx_sw_bytes));
342 fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
343#endif
344}
345 441
346#ifdef CONFIG_X86_64 442 if (config_enabled(CONFIG_IA32_EMULATION)) {
347unsigned int sig_xstate_size = sizeof(struct _fpstate); 443 fx_sw_reserved_ia32 = fx_sw_reserved;
348#endif 444 fx_sw_reserved_ia32.extended_size += fsave_header_size;
445 }
446}
349 447
350/* 448/*
351 * Enable the extended processor state save/restore feature 449 * Enable the extended processor state save/restore feature
@@ -384,19 +482,21 @@ static void __init setup_xstate_features(void)
384/* 482/*
385 * setup the xstate image representing the init state 483 * setup the xstate image representing the init state
386 */ 484 */
387static void __init setup_xstate_init(void) 485static void __init setup_init_fpu_buf(void)
388{ 486{
389 setup_xstate_features();
390
391 /* 487 /*
392 * Setup init_xstate_buf to represent the init state of 488 * Setup init_xstate_buf to represent the init state of
393 * all the features managed by the xsave 489 * all the features managed by the xsave
394 */ 490 */
395 init_xstate_buf = alloc_bootmem_align(xstate_size, 491 init_xstate_buf = alloc_bootmem_align(xstate_size,
396 __alignof__(struct xsave_struct)); 492 __alignof__(struct xsave_struct));
397 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 493 fx_finit(&init_xstate_buf->i387);
494
495 if (!cpu_has_xsave)
496 return;
497
498 setup_xstate_features();
398 499
399 clts();
400 /* 500 /*
401 * Init all the features state with header_bv being 0x0 501 * Init all the features state with header_bv being 0x0
402 */ 502 */
@@ -406,9 +506,21 @@ static void __init setup_xstate_init(void)
406 * of any feature which is not represented by all zero's. 506 * of any feature which is not represented by all zero's.
407 */ 507 */
408 xsave_state(init_xstate_buf, -1); 508 xsave_state(init_xstate_buf, -1);
409 stts();
410} 509}
411 510
511static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
512static int __init eager_fpu_setup(char *s)
513{
514 if (!strcmp(s, "on"))
515 eagerfpu = ENABLE;
516 else if (!strcmp(s, "off"))
517 eagerfpu = DISABLE;
518 else if (!strcmp(s, "auto"))
519 eagerfpu = AUTO;
520 return 1;
521}
522__setup("eagerfpu=", eager_fpu_setup);
523
412/* 524/*
413 * Enable and initialize the xsave feature. 525 * Enable and initialize the xsave feature.
414 */ 526 */
@@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void)
445 557
446 update_regset_xstate_info(xstate_size, pcntxt_mask); 558 update_regset_xstate_info(xstate_size, pcntxt_mask);
447 prepare_fx_sw_frame(); 559 prepare_fx_sw_frame();
560 setup_init_fpu_buf();
448 561
449 setup_xstate_init(); 562 /* Auto enable eagerfpu for xsaveopt */
563 if (cpu_has_xsaveopt && eagerfpu != DISABLE)
564 eagerfpu = ENABLE;
450 565
451 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", 566 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
452 pcntxt_mask, xstate_size); 567 pcntxt_mask, xstate_size);
@@ -471,3 +586,43 @@ void __cpuinit xsave_init(void)
471 next_func = xstate_enable; 586 next_func = xstate_enable;
472 this_func(); 587 this_func();
473} 588}
589
590static inline void __init eager_fpu_init_bp(void)
591{
592 current->thread.fpu.state =
593 alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
594 if (!init_xstate_buf)
595 setup_init_fpu_buf();
596}
597
598void __cpuinit eager_fpu_init(void)
599{
600 static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
601
602 clear_used_math();
603 current_thread_info()->status = 0;
604
605 if (eagerfpu == ENABLE)
606 setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
607
608 if (!cpu_has_eager_fpu) {
609 stts();
610 return;
611 }
612
613 if (boot_func) {
614 boot_func();
615 boot_func = NULL;
616 }
617
618 /*
619 * This is same as math_state_restore(). But use_xsave() is
620 * not yet patched to use math_state_restore().
621 */
622 init_fpu(current);
623 __thread_fpu_begin(current);
624 if (cpu_has_xsave)
625 xrstor_state(init_xstate_buf, -1);
626 else
627 fxrstor_checking(&init_xstate_buf->i387);
628}