diff options
author | Arnd Bergmann <arnd@arndb.de> | 2012-10-04 16:57:00 -0400 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2012-10-04 16:57:51 -0400 |
commit | c37d6154c0b9163c27e53cc1d0be3867b4abd760 (patch) | |
tree | 7a24522c56d1cb284dff1d3c225bbdaba0901bb5 /arch/x86/kernel | |
parent | e7a570ff7dff9af6e54ff5e580a61ec7652137a0 (diff) | |
parent | 8a1ab3155c2ac7fbe5f2038d6e26efeb607a1498 (diff) |
Merge branch 'disintegrate-asm-generic' of git://git.infradead.org/users/dhowells/linux-headers into asm-generic
Patches from David Howells <dhowells@redhat.com>:
This is to complete part of the UAPI disintegration for which the
preparatory patches were pulled recently.
Note that there are some fixup patches which are at the base of the
branch aimed at you, plus all arches get the asm-generic branch merged in too.
* 'disintegrate-asm-generic' of git://git.infradead.org/users/dhowells/linux-headers:
UAPI: (Scripted) Disintegrate include/asm-generic
UAPI: Fix conditional header installation handling (notably kvm_para.h on m68k)
c6x: remove c6x signal.h
UAPI: Split compound conditionals containing __KERNEL__ in Arm64
UAPI: Fix the guards on various asm/unistd.h files
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Diffstat (limited to 'arch/x86/kernel')
46 files changed, 1908 insertions, 1128 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d97..8d7a619718b5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | |||
100 | obj-$(CONFIG_OF) += devicetree.o | 100 | obj-$(CONFIG_OF) += devicetree.o |
101 | obj-$(CONFIG_UPROBES) += uprobes.o | 101 | obj-$(CONFIG_UPROBES) += uprobes.o |
102 | 102 | ||
103 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | ||
104 | |||
103 | ### | 105 | ### |
104 | # 64 bit specific files | 106 | # 64 bit specific files |
105 | ifeq ($(CONFIG_X86_64),y) | 107 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6ed..e651f7a589ac 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
656 | acpi_register_lapic(physid, ACPI_MADT_ENABLED); | 656 | acpi_register_lapic(physid, ACPI_MADT_ENABLED); |
657 | 657 | ||
658 | /* | 658 | /* |
659 | * If mp_register_lapic successfully generates a new logical cpu | 659 | * If acpi_register_lapic successfully generates a new logical cpu |
660 | * number, then the following will get us exactly what was mapped | 660 | * number, then the following will get us exactly what was mapped |
661 | */ | 661 | */ |
662 | cpumask_andnot(new_map, cpu_present_mask, tmp_map); | 662 | cpumask_andnot(new_map, cpu_present_mask, tmp_map); |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1b8e5a03d942..11676cf65aee 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void) | |||
43 | 43 | ||
44 | header->video_mode = saved_video_mode; | 44 | header->video_mode = saved_video_mode; |
45 | 45 | ||
46 | header->pmode_behavior = 0; | ||
47 | |||
46 | #ifndef CONFIG_64BIT | 48 | #ifndef CONFIG_64BIT |
47 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 49 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
48 | 50 | ||
49 | if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, | 51 | if (!rdmsr_safe(MSR_EFER, |
50 | &header->pmode_efer_high)) | 52 | &header->pmode_efer_low, |
51 | header->pmode_efer_low = header->pmode_efer_high = 0; | 53 | &header->pmode_efer_high)) |
54 | header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER); | ||
52 | #endif /* !CONFIG_64BIT */ | 55 | #endif /* !CONFIG_64BIT */ |
53 | 56 | ||
54 | header->pmode_cr0 = read_cr0(); | 57 | header->pmode_cr0 = read_cr0(); |
55 | header->pmode_cr4 = read_cr4_safe(); | 58 | if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { |
56 | header->pmode_behavior = 0; | 59 | header->pmode_cr4 = read_cr4(); |
60 | header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); | ||
61 | } | ||
57 | if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, | 62 | if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, |
58 | &header->pmode_misc_en_low, | 63 | &header->pmode_misc_en_low, |
59 | &header->pmode_misc_en_high)) | 64 | &header->pmode_misc_en_high)) |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ced4534baed5..ef5ccca79a6c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -23,19 +23,6 @@ | |||
23 | 23 | ||
24 | #define MAX_PATCH_LEN (255-1) | 24 | #define MAX_PATCH_LEN (255-1) |
25 | 25 | ||
26 | #ifdef CONFIG_HOTPLUG_CPU | ||
27 | static int smp_alt_once; | ||
28 | |||
29 | static int __init bootonly(char *str) | ||
30 | { | ||
31 | smp_alt_once = 1; | ||
32 | return 1; | ||
33 | } | ||
34 | __setup("smp-alt-boot", bootonly); | ||
35 | #else | ||
36 | #define smp_alt_once 1 | ||
37 | #endif | ||
38 | |||
39 | static int __initdata_or_module debug_alternative; | 26 | static int __initdata_or_module debug_alternative; |
40 | 27 | ||
41 | static int __init debug_alt(char *str) | 28 | static int __init debug_alt(char *str) |
@@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, | |||
317 | /* turn DS segment override prefix into lock prefix */ | 304 | /* turn DS segment override prefix into lock prefix */ |
318 | if (*ptr == 0x3e) | 305 | if (*ptr == 0x3e) |
319 | text_poke(ptr, ((unsigned char []){0xf0}), 1); | 306 | text_poke(ptr, ((unsigned char []){0xf0}), 1); |
320 | }; | 307 | } |
321 | mutex_unlock(&text_mutex); | 308 | mutex_unlock(&text_mutex); |
322 | } | 309 | } |
323 | 310 | ||
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, | |||
326 | { | 313 | { |
327 | const s32 *poff; | 314 | const s32 *poff; |
328 | 315 | ||
329 | if (noreplace_smp) | ||
330 | return; | ||
331 | |||
332 | mutex_lock(&text_mutex); | 316 | mutex_lock(&text_mutex); |
333 | for (poff = start; poff < end; poff++) { | 317 | for (poff = start; poff < end; poff++) { |
334 | u8 *ptr = (u8 *)poff + *poff; | 318 | u8 *ptr = (u8 *)poff + *poff; |
@@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, | |||
338 | /* turn lock prefix into DS segment override prefix */ | 322 | /* turn lock prefix into DS segment override prefix */ |
339 | if (*ptr == 0xf0) | 323 | if (*ptr == 0xf0) |
340 | text_poke(ptr, ((unsigned char []){0x3E}), 1); | 324 | text_poke(ptr, ((unsigned char []){0x3E}), 1); |
341 | }; | 325 | } |
342 | mutex_unlock(&text_mutex); | 326 | mutex_unlock(&text_mutex); |
343 | } | 327 | } |
344 | 328 | ||
@@ -359,7 +343,7 @@ struct smp_alt_module { | |||
359 | }; | 343 | }; |
360 | static LIST_HEAD(smp_alt_modules); | 344 | static LIST_HEAD(smp_alt_modules); |
361 | static DEFINE_MUTEX(smp_alt); | 345 | static DEFINE_MUTEX(smp_alt); |
362 | static int smp_mode = 1; /* protected by smp_alt */ | 346 | static bool uniproc_patched = false; /* protected by smp_alt */ |
363 | 347 | ||
364 | void __init_or_module alternatives_smp_module_add(struct module *mod, | 348 | void __init_or_module alternatives_smp_module_add(struct module *mod, |
365 | char *name, | 349 | char *name, |
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, | |||
368 | { | 352 | { |
369 | struct smp_alt_module *smp; | 353 | struct smp_alt_module *smp; |
370 | 354 | ||
371 | if (noreplace_smp) | 355 | mutex_lock(&smp_alt); |
372 | return; | 356 | if (!uniproc_patched) |
357 | goto unlock; | ||
373 | 358 | ||
374 | if (smp_alt_once) { | 359 | if (num_possible_cpus() == 1) |
375 | if (boot_cpu_has(X86_FEATURE_UP)) | 360 | /* Don't bother remembering, we'll never have to undo it. */ |
376 | alternatives_smp_unlock(locks, locks_end, | 361 | goto smp_unlock; |
377 | text, text_end); | ||
378 | return; | ||
379 | } | ||
380 | 362 | ||
381 | smp = kzalloc(sizeof(*smp), GFP_KERNEL); | 363 | smp = kzalloc(sizeof(*smp), GFP_KERNEL); |
382 | if (NULL == smp) | 364 | if (NULL == smp) |
383 | return; /* we'll run the (safe but slow) SMP code then ... */ | 365 | /* we'll run the (safe but slow) SMP code then ... */ |
366 | goto unlock; | ||
384 | 367 | ||
385 | smp->mod = mod; | 368 | smp->mod = mod; |
386 | smp->name = name; | 369 | smp->name = name; |
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, | |||
392 | __func__, smp->locks, smp->locks_end, | 375 | __func__, smp->locks, smp->locks_end, |
393 | smp->text, smp->text_end, smp->name); | 376 | smp->text, smp->text_end, smp->name); |
394 | 377 | ||
395 | mutex_lock(&smp_alt); | ||
396 | list_add_tail(&smp->next, &smp_alt_modules); | 378 | list_add_tail(&smp->next, &smp_alt_modules); |
397 | if (boot_cpu_has(X86_FEATURE_UP)) | 379 | smp_unlock: |
398 | alternatives_smp_unlock(smp->locks, smp->locks_end, | 380 | alternatives_smp_unlock(locks, locks_end, text, text_end); |
399 | smp->text, smp->text_end); | 381 | unlock: |
400 | mutex_unlock(&smp_alt); | 382 | mutex_unlock(&smp_alt); |
401 | } | 383 | } |
402 | 384 | ||
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) | |||
404 | { | 386 | { |
405 | struct smp_alt_module *item; | 387 | struct smp_alt_module *item; |
406 | 388 | ||
407 | if (smp_alt_once || noreplace_smp) | ||
408 | return; | ||
409 | |||
410 | mutex_lock(&smp_alt); | 389 | mutex_lock(&smp_alt); |
411 | list_for_each_entry(item, &smp_alt_modules, next) { | 390 | list_for_each_entry(item, &smp_alt_modules, next) { |
412 | if (mod != item->mod) | 391 | if (mod != item->mod) |
413 | continue; | 392 | continue; |
414 | list_del(&item->next); | 393 | list_del(&item->next); |
415 | mutex_unlock(&smp_alt); | ||
416 | DPRINTK("%s: %s\n", __func__, item->name); | ||
417 | kfree(item); | 394 | kfree(item); |
418 | return; | 395 | break; |
419 | } | 396 | } |
420 | mutex_unlock(&smp_alt); | 397 | mutex_unlock(&smp_alt); |
421 | } | 398 | } |
422 | 399 | ||
423 | bool skip_smp_alternatives; | 400 | void alternatives_enable_smp(void) |
424 | void alternatives_smp_switch(int smp) | ||
425 | { | 401 | { |
426 | struct smp_alt_module *mod; | 402 | struct smp_alt_module *mod; |
427 | 403 | ||
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp) | |||
436 | pr_info("lockdep: fixing up alternatives\n"); | 412 | pr_info("lockdep: fixing up alternatives\n"); |
437 | #endif | 413 | #endif |
438 | 414 | ||
439 | if (noreplace_smp || smp_alt_once || skip_smp_alternatives) | 415 | /* Why bother if there are no other CPUs? */ |
440 | return; | 416 | BUG_ON(num_possible_cpus() == 1); |
441 | BUG_ON(!smp && (num_online_cpus() > 1)); | ||
442 | 417 | ||
443 | mutex_lock(&smp_alt); | 418 | mutex_lock(&smp_alt); |
444 | 419 | ||
445 | /* | 420 | if (uniproc_patched) { |
446 | * Avoid unnecessary switches because it forces JIT based VMs to | ||
447 | * throw away all cached translations, which can be quite costly. | ||
448 | */ | ||
449 | if (smp == smp_mode) { | ||
450 | /* nothing */ | ||
451 | } else if (smp) { | ||
452 | pr_info("switching to SMP code\n"); | 421 | pr_info("switching to SMP code\n"); |
422 | BUG_ON(num_online_cpus() != 1); | ||
453 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 423 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
454 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 424 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
455 | list_for_each_entry(mod, &smp_alt_modules, next) | 425 | list_for_each_entry(mod, &smp_alt_modules, next) |
456 | alternatives_smp_lock(mod->locks, mod->locks_end, | 426 | alternatives_smp_lock(mod->locks, mod->locks_end, |
457 | mod->text, mod->text_end); | 427 | mod->text, mod->text_end); |
458 | } else { | 428 | uniproc_patched = false; |
459 | pr_info("switching to UP code\n"); | ||
460 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | ||
461 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | ||
462 | list_for_each_entry(mod, &smp_alt_modules, next) | ||
463 | alternatives_smp_unlock(mod->locks, mod->locks_end, | ||
464 | mod->text, mod->text_end); | ||
465 | } | 429 | } |
466 | smp_mode = smp; | ||
467 | mutex_unlock(&smp_alt); | 430 | mutex_unlock(&smp_alt); |
468 | } | 431 | } |
469 | 432 | ||
@@ -540,40 +503,22 @@ void __init alternative_instructions(void) | |||
540 | 503 | ||
541 | apply_alternatives(__alt_instructions, __alt_instructions_end); | 504 | apply_alternatives(__alt_instructions, __alt_instructions_end); |
542 | 505 | ||
543 | /* switch to patch-once-at-boottime-only mode and free the | ||
544 | * tables in case we know the number of CPUs will never ever | ||
545 | * change */ | ||
546 | #ifdef CONFIG_HOTPLUG_CPU | ||
547 | if (num_possible_cpus() < 2) | ||
548 | smp_alt_once = 1; | ||
549 | #endif | ||
550 | |||
551 | #ifdef CONFIG_SMP | 506 | #ifdef CONFIG_SMP |
552 | if (smp_alt_once) { | 507 | /* Patch to UP if other cpus not imminent. */ |
553 | if (1 == num_possible_cpus()) { | 508 | if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { |
554 | pr_info("switching to UP code\n"); | 509 | uniproc_patched = true; |
555 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | ||
556 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | ||
557 | |||
558 | alternatives_smp_unlock(__smp_locks, __smp_locks_end, | ||
559 | _text, _etext); | ||
560 | } | ||
561 | } else { | ||
562 | alternatives_smp_module_add(NULL, "core kernel", | 510 | alternatives_smp_module_add(NULL, "core kernel", |
563 | __smp_locks, __smp_locks_end, | 511 | __smp_locks, __smp_locks_end, |
564 | _text, _etext); | 512 | _text, _etext); |
565 | |||
566 | /* Only switch to UP mode if we don't immediately boot others */ | ||
567 | if (num_present_cpus() == 1 || setup_max_cpus <= 1) | ||
568 | alternatives_smp_switch(0); | ||
569 | } | 513 | } |
570 | #endif | ||
571 | apply_paravirt(__parainstructions, __parainstructions_end); | ||
572 | 514 | ||
573 | if (smp_alt_once) | 515 | if (!uniproc_patched || num_possible_cpus() == 1) |
574 | free_init_pages("SMP alternatives", | 516 | free_init_pages("SMP alternatives", |
575 | (unsigned long)__smp_locks, | 517 | (unsigned long)__smp_locks, |
576 | (unsigned long)__smp_locks_end); | 518 | (unsigned long)__smp_locks_end); |
519 | #endif | ||
520 | |||
521 | apply_paravirt(__parainstructions, __parainstructions_end); | ||
577 | 522 | ||
578 | restart_nmi(); | 523 | restart_nmi(); |
579 | } | 524 | } |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24deb3082328..b17416e72fbd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1934 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); | 1934 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); |
1935 | i++; | 1935 | i++; |
1936 | v1 >>= 1; | 1936 | v1 >>= 1; |
1937 | }; | 1937 | } |
1938 | 1938 | ||
1939 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); | 1939 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); |
1940 | 1940 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9d92e19039f0..f7e98a2c0d12 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, | |||
737 | } | 737 | } |
738 | #endif | 738 | #endif |
739 | 739 | ||
740 | static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) | ||
741 | { | ||
742 | if (!cpu_has_invlpg) | ||
743 | return; | ||
744 | |||
745 | tlb_flushall_shift = 5; | ||
746 | |||
747 | if (c->x86 <= 0x11) | ||
748 | tlb_flushall_shift = 4; | ||
749 | } | ||
750 | |||
751 | static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) | ||
752 | { | ||
753 | u32 ebx, eax, ecx, edx; | ||
754 | u16 mask = 0xfff; | ||
755 | |||
756 | if (c->x86 < 0xf) | ||
757 | return; | ||
758 | |||
759 | if (c->extended_cpuid_level < 0x80000006) | ||
760 | return; | ||
761 | |||
762 | cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | ||
763 | |||
764 | tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; | ||
765 | tlb_lli_4k[ENTRIES] = ebx & mask; | ||
766 | |||
767 | /* | ||
768 | * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB | ||
769 | * characteristics from the CPUID function 0x80000005 instead. | ||
770 | */ | ||
771 | if (c->x86 == 0xf) { | ||
772 | cpuid(0x80000005, &eax, &ebx, &ecx, &edx); | ||
773 | mask = 0xff; | ||
774 | } | ||
775 | |||
776 | /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ | ||
777 | if (!((eax >> 16) & mask)) { | ||
778 | u32 a, b, c, d; | ||
779 | |||
780 | cpuid(0x80000005, &a, &b, &c, &d); | ||
781 | tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; | ||
782 | } else { | ||
783 | tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; | ||
784 | } | ||
785 | |||
786 | /* a 4M entry uses two 2M entries */ | ||
787 | tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; | ||
788 | |||
789 | /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ | ||
790 | if (!(eax & mask)) { | ||
791 | /* Erratum 658 */ | ||
792 | if (c->x86 == 0x15 && c->x86_model <= 0x1f) { | ||
793 | tlb_lli_2m[ENTRIES] = 1024; | ||
794 | } else { | ||
795 | cpuid(0x80000005, &eax, &ebx, &ecx, &edx); | ||
796 | tlb_lli_2m[ENTRIES] = eax & 0xff; | ||
797 | } | ||
798 | } else | ||
799 | tlb_lli_2m[ENTRIES] = eax & mask; | ||
800 | |||
801 | tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; | ||
802 | |||
803 | cpu_set_tlb_flushall_shift(c); | ||
804 | } | ||
805 | |||
740 | static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | 806 | static const struct cpu_dev __cpuinitconst amd_cpu_dev = { |
741 | .c_vendor = "AMD", | 807 | .c_vendor = "AMD", |
742 | .c_ident = { "AuthenticAMD" }, | 808 | .c_ident = { "AuthenticAMD" }, |
@@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | |||
756 | .c_size_cache = amd_size_cache, | 822 | .c_size_cache = amd_size_cache, |
757 | #endif | 823 | #endif |
758 | .c_early_init = early_init_amd, | 824 | .c_early_init = early_init_amd, |
825 | .c_detect_tlb = cpu_detect_tlb_amd, | ||
759 | .c_bsp_init = bsp_init_amd, | 826 | .c_bsp_init = bsp_init_amd, |
760 | .c_init = init_amd, | 827 | .c_init = init_amd, |
761 | .c_x86_vendor = X86_VENDOR_AMD, | 828 | .c_x86_vendor = X86_VENDOR_AMD, |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c97bb7b5a9f8..d0e910da16c5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -165,10 +165,15 @@ void __init check_bugs(void) | |||
165 | print_cpu_info(&boot_cpu_data); | 165 | print_cpu_info(&boot_cpu_data); |
166 | #endif | 166 | #endif |
167 | check_config(); | 167 | check_config(); |
168 | check_fpu(); | ||
169 | check_hlt(); | 168 | check_hlt(); |
170 | check_popad(); | 169 | check_popad(); |
171 | init_utsname()->machine[1] = | 170 | init_utsname()->machine[1] = |
172 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | 171 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); |
173 | alternative_instructions(); | 172 | alternative_instructions(); |
173 | |||
174 | /* | ||
175 | * kernel_fpu_begin/end() in check_fpu() relies on the patched | ||
176 | * alternative instructions. | ||
177 | */ | ||
178 | check_fpu(); | ||
174 | } | 179 | } |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fccc..7505f7b13e71 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -259,23 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |||
259 | } | 259 | } |
260 | #endif | 260 | #endif |
261 | 261 | ||
262 | static int disable_smep __cpuinitdata; | ||
263 | static __init int setup_disable_smep(char *arg) | 262 | static __init int setup_disable_smep(char *arg) |
264 | { | 263 | { |
265 | disable_smep = 1; | 264 | setup_clear_cpu_cap(X86_FEATURE_SMEP); |
266 | return 1; | 265 | return 1; |
267 | } | 266 | } |
268 | __setup("nosmep", setup_disable_smep); | 267 | __setup("nosmep", setup_disable_smep); |
269 | 268 | ||
270 | static __cpuinit void setup_smep(struct cpuinfo_x86 *c) | 269 | static __always_inline void setup_smep(struct cpuinfo_x86 *c) |
271 | { | 270 | { |
272 | if (cpu_has(c, X86_FEATURE_SMEP)) { | 271 | if (cpu_has(c, X86_FEATURE_SMEP)) |
273 | if (unlikely(disable_smep)) { | 272 | set_in_cr4(X86_CR4_SMEP); |
274 | setup_clear_cpu_cap(X86_FEATURE_SMEP); | 273 | } |
275 | clear_in_cr4(X86_CR4_SMEP); | 274 | |
276 | } else | 275 | static __init int setup_disable_smap(char *arg) |
277 | set_in_cr4(X86_CR4_SMEP); | 276 | { |
278 | } | 277 | setup_clear_cpu_cap(X86_FEATURE_SMAP); |
278 | return 1; | ||
279 | } | ||
280 | __setup("nosmap", setup_disable_smap); | ||
281 | |||
282 | static __always_inline void setup_smap(struct cpuinfo_x86 *c) | ||
283 | { | ||
284 | unsigned long eflags; | ||
285 | |||
286 | /* This should have been cleared long ago */ | ||
287 | raw_local_save_flags(eflags); | ||
288 | BUG_ON(eflags & X86_EFLAGS_AC); | ||
289 | |||
290 | if (cpu_has(c, X86_FEATURE_SMAP)) | ||
291 | set_in_cr4(X86_CR4_SMAP); | ||
279 | } | 292 | } |
280 | 293 | ||
281 | /* | 294 | /* |
@@ -476,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) | |||
476 | 489 | ||
477 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 490 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ |
478 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 491 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ |
479 | "tlb_flushall_shift is 0x%x\n", | 492 | "tlb_flushall_shift: %d\n", |
480 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | 493 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
481 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | 494 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], |
482 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | 495 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], |
@@ -712,8 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
712 | c->cpu_index = 0; | 725 | c->cpu_index = 0; |
713 | filter_cpuid_features(c, false); | 726 | filter_cpuid_features(c, false); |
714 | 727 | ||
715 | setup_smep(c); | ||
716 | |||
717 | if (this_cpu->c_bsp_init) | 728 | if (this_cpu->c_bsp_init) |
718 | this_cpu->c_bsp_init(c); | 729 | this_cpu->c_bsp_init(c); |
719 | } | 730 | } |
@@ -798,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
798 | c->phys_proc_id = c->initial_apicid; | 809 | c->phys_proc_id = c->initial_apicid; |
799 | } | 810 | } |
800 | 811 | ||
801 | setup_smep(c); | ||
802 | |||
803 | get_model_name(c); /* Default name */ | 812 | get_model_name(c); /* Default name */ |
804 | 813 | ||
805 | detect_nopl(c); | 814 | detect_nopl(c); |
@@ -864,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
864 | /* Disable the PN if appropriate */ | 873 | /* Disable the PN if appropriate */ |
865 | squash_the_stupid_serial_number(c); | 874 | squash_the_stupid_serial_number(c); |
866 | 875 | ||
876 | /* Set up SMEP/SMAP */ | ||
877 | setup_smep(c); | ||
878 | setup_smap(c); | ||
879 | |||
867 | /* | 880 | /* |
868 | * The vendor-specific functions might have changed features. | 881 | * The vendor-specific functions might have changed features. |
869 | * Now we do "generic changes." | 882 | * Now we do "generic changes." |
@@ -942,8 +955,7 @@ void __init identify_boot_cpu(void) | |||
942 | #else | 955 | #else |
943 | vgetcpu_set_mode(); | 956 | vgetcpu_set_mode(); |
944 | #endif | 957 | #endif |
945 | if (boot_cpu_data.cpuid_level >= 2) | 958 | cpu_detect_tlb(&boot_cpu_data); |
946 | cpu_detect_tlb(&boot_cpu_data); | ||
947 | } | 959 | } |
948 | 960 | ||
949 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 961 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
@@ -1023,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |||
1023 | printk(KERN_CONT "%s ", vendor); | 1035 | printk(KERN_CONT "%s ", vendor); |
1024 | 1036 | ||
1025 | if (c->x86_model_id[0]) | 1037 | if (c->x86_model_id[0]) |
1026 | printk(KERN_CONT "%s", c->x86_model_id); | 1038 | printk(KERN_CONT "%s", strim(c->x86_model_id)); |
1027 | else | 1039 | else |
1028 | printk(KERN_CONT "%d86", c->x86); | 1040 | printk(KERN_CONT "%d86", c->x86); |
1029 | 1041 | ||
1042 | printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); | ||
1043 | |||
1030 | if (c->x86_mask || c->cpuid_level >= 0) | 1044 | if (c->x86_mask || c->cpuid_level >= 0) |
1031 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); | 1045 | printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); |
1032 | else | 1046 | else |
1033 | printk(KERN_CONT "\n"); | 1047 | printk(KERN_CONT ")\n"); |
1034 | 1048 | ||
1035 | print_cpu_msr(c); | 1049 | print_cpu_msr(c); |
1036 | } | 1050 | } |
@@ -1113,11 +1127,10 @@ void syscall_init(void) | |||
1113 | 1127 | ||
1114 | /* Flags to clear on syscall */ | 1128 | /* Flags to clear on syscall */ |
1115 | wrmsrl(MSR_SYSCALL_MASK, | 1129 | wrmsrl(MSR_SYSCALL_MASK, |
1116 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | 1130 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| |
1131 | X86_EFLAGS_IOPL|X86_EFLAGS_AC); | ||
1117 | } | 1132 | } |
1118 | 1133 | ||
1119 | unsigned long kernel_eflags; | ||
1120 | |||
1121 | /* | 1134 | /* |
1122 | * Copies of the original ist values from the tss are only accessed during | 1135 | * Copies of the original ist values from the tss are only accessed during |
1123 | * debugging, no special alignment required. | 1136 | * debugging, no special alignment required. |
@@ -1297,9 +1310,6 @@ void __cpuinit cpu_init(void) | |||
1297 | dbg_restore_debug_regs(); | 1310 | dbg_restore_debug_regs(); |
1298 | 1311 | ||
1299 | fpu_init(); | 1312 | fpu_init(); |
1300 | xsave_init(); | ||
1301 | |||
1302 | raw_local_save_flags(kernel_eflags); | ||
1303 | 1313 | ||
1304 | if (is_uv_system()) | 1314 | if (is_uv_system()) |
1305 | uv_cpu_init(); | 1315 | uv_cpu_init(); |
@@ -1352,6 +1362,5 @@ void __cpuinit cpu_init(void) | |||
1352 | dbg_restore_debug_regs(); | 1362 | dbg_restore_debug_regs(); |
1353 | 1363 | ||
1354 | fpu_init(); | 1364 | fpu_init(); |
1355 | xsave_init(); | ||
1356 | } | 1365 | } |
1357 | #endif | 1366 | #endif |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0a4ce2980a5a..198e019a531a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) | |||
648 | int i, j, n; | 648 | int i, j, n; |
649 | unsigned int regs[4]; | 649 | unsigned int regs[4]; |
650 | unsigned char *desc = (unsigned char *)regs; | 650 | unsigned char *desc = (unsigned char *)regs; |
651 | |||
652 | if (c->cpuid_level < 2) | ||
653 | return; | ||
654 | |||
651 | /* Number of times to iterate */ | 655 | /* Number of times to iterate */ |
652 | n = cpuid_eax(2) & 0xFF; | 656 | n = cpuid_eax(2) & 0xFF; |
653 | 657 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index fc4beb393577..ddc72f839332 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) | |||
78 | } | 78 | } |
79 | 79 | ||
80 | static cpumask_var_t mce_inject_cpumask; | 80 | static cpumask_var_t mce_inject_cpumask; |
81 | static DEFINE_MUTEX(mce_inject_mutex); | ||
81 | 82 | ||
82 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) | 83 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) |
83 | { | 84 | { |
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m) | |||
194 | put_online_cpus(); | 195 | put_online_cpus(); |
195 | } else | 196 | } else |
196 | #endif | 197 | #endif |
198 | { | ||
199 | preempt_disable(); | ||
197 | raise_local(); | 200 | raise_local(); |
201 | preempt_enable(); | ||
202 | } | ||
198 | } | 203 | } |
199 | 204 | ||
200 | /* Error injection interface */ | 205 | /* Error injection interface */ |
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
225 | * so do it a jiffie or two later everywhere. | 230 | * so do it a jiffie or two later everywhere. |
226 | */ | 231 | */ |
227 | schedule_timeout(2); | 232 | schedule_timeout(2); |
233 | |||
234 | mutex_lock(&mce_inject_mutex); | ||
228 | raise_mce(&m); | 235 | raise_mce(&m); |
236 | mutex_unlock(&mce_inject_mutex); | ||
229 | return usize; | 237 | return usize; |
230 | } | 238 | } |
231 | 239 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ed44c8a65858..6a05c1d327a9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -28,6 +28,18 @@ extern int mce_ser; | |||
28 | 28 | ||
29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
30 | 30 | ||
31 | #ifdef CONFIG_X86_MCE_INTEL | ||
32 | unsigned long mce_intel_adjust_timer(unsigned long interval); | ||
33 | void mce_intel_cmci_poll(void); | ||
34 | void mce_intel_hcpu_update(unsigned long cpu); | ||
35 | #else | ||
36 | # define mce_intel_adjust_timer mce_adjust_timer_default | ||
37 | static inline void mce_intel_cmci_poll(void) { } | ||
38 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } | ||
39 | #endif | ||
40 | |||
41 | void mce_timer_kick(unsigned long interval); | ||
42 | |||
31 | #ifdef CONFIG_ACPI_APEI | 43 | #ifdef CONFIG_ACPI_APEI |
32 | int apei_write_mce(struct mce *m); | 44 | int apei_write_mce(struct mce *m); |
33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | 45 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 292d0258311c..29e87d3b2843 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; | |||
83 | int mce_cmci_disabled __read_mostly; | 83 | int mce_cmci_disabled __read_mostly; |
84 | int mce_ignore_ce __read_mostly; | 84 | int mce_ignore_ce __read_mostly; |
85 | int mce_ser __read_mostly; | 85 | int mce_ser __read_mostly; |
86 | int mce_bios_cmci_threshold __read_mostly; | ||
86 | 87 | ||
87 | struct mce_bank *mce_banks __read_mostly; | 88 | struct mce_bank *mce_banks __read_mostly; |
88 | 89 | ||
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */ | |||
1266 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | 1267 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ |
1267 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1268 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1268 | 1269 | ||
1270 | static unsigned long mce_adjust_timer_default(unsigned long interval) | ||
1271 | { | ||
1272 | return interval; | ||
1273 | } | ||
1274 | |||
1275 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = | ||
1276 | mce_adjust_timer_default; | ||
1277 | |||
1269 | static void mce_timer_fn(unsigned long data) | 1278 | static void mce_timer_fn(unsigned long data) |
1270 | { | 1279 | { |
1271 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1280 | struct timer_list *t = &__get_cpu_var(mce_timer); |
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data) | |||
1276 | if (mce_available(__this_cpu_ptr(&cpu_info))) { | 1285 | if (mce_available(__this_cpu_ptr(&cpu_info))) { |
1277 | machine_check_poll(MCP_TIMESTAMP, | 1286 | machine_check_poll(MCP_TIMESTAMP, |
1278 | &__get_cpu_var(mce_poll_banks)); | 1287 | &__get_cpu_var(mce_poll_banks)); |
1288 | mce_intel_cmci_poll(); | ||
1279 | } | 1289 | } |
1280 | 1290 | ||
1281 | /* | 1291 | /* |
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data) | |||
1283 | * polling interval, otherwise increase the polling interval. | 1293 | * polling interval, otherwise increase the polling interval. |
1284 | */ | 1294 | */ |
1285 | iv = __this_cpu_read(mce_next_interval); | 1295 | iv = __this_cpu_read(mce_next_interval); |
1286 | if (mce_notify_irq()) | 1296 | if (mce_notify_irq()) { |
1287 | iv = max(iv / 2, (unsigned long) HZ/100); | 1297 | iv = max(iv / 2, (unsigned long) HZ/100); |
1288 | else | 1298 | } else { |
1289 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); | 1299 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
1300 | iv = mce_adjust_timer(iv); | ||
1301 | } | ||
1290 | __this_cpu_write(mce_next_interval, iv); | 1302 | __this_cpu_write(mce_next_interval, iv); |
1303 | /* Might have become 0 after CMCI storm subsided */ | ||
1304 | if (iv) { | ||
1305 | t->expires = jiffies + iv; | ||
1306 | add_timer_on(t, smp_processor_id()); | ||
1307 | } | ||
1308 | } | ||
1291 | 1309 | ||
1292 | t->expires = jiffies + iv; | 1310 | /* |
1293 | add_timer_on(t, smp_processor_id()); | 1311 | * Ensure that the timer is firing in @interval from now. |
1312 | */ | ||
1313 | void mce_timer_kick(unsigned long interval) | ||
1314 | { | ||
1315 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
1316 | unsigned long when = jiffies + interval; | ||
1317 | unsigned long iv = __this_cpu_read(mce_next_interval); | ||
1318 | |||
1319 | if (timer_pending(t)) { | ||
1320 | if (time_before(when, t->expires)) | ||
1321 | mod_timer_pinned(t, when); | ||
1322 | } else { | ||
1323 | t->expires = round_jiffies(when); | ||
1324 | add_timer_on(t, smp_processor_id()); | ||
1325 | } | ||
1326 | if (interval < iv) | ||
1327 | __this_cpu_write(mce_next_interval, interval); | ||
1294 | } | 1328 | } |
1295 | 1329 | ||
1296 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ | 1330 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ |
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1585 | switch (c->x86_vendor) { | 1619 | switch (c->x86_vendor) { |
1586 | case X86_VENDOR_INTEL: | 1620 | case X86_VENDOR_INTEL: |
1587 | mce_intel_feature_init(c); | 1621 | mce_intel_feature_init(c); |
1622 | mce_adjust_timer = mce_intel_adjust_timer; | ||
1588 | break; | 1623 | break; |
1589 | case X86_VENDOR_AMD: | 1624 | case X86_VENDOR_AMD: |
1590 | mce_amd_feature_init(c); | 1625 | mce_amd_feature_init(c); |
@@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1594 | } | 1629 | } |
1595 | } | 1630 | } |
1596 | 1631 | ||
1597 | static void __mcheck_cpu_init_timer(void) | 1632 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
1598 | { | 1633 | { |
1599 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1634 | unsigned long iv = mce_adjust_timer(check_interval * HZ); |
1600 | unsigned long iv = check_interval * HZ; | ||
1601 | 1635 | ||
1602 | setup_timer(t, mce_timer_fn, smp_processor_id()); | 1636 | __this_cpu_write(mce_next_interval, iv); |
1603 | 1637 | ||
1604 | if (mce_ignore_ce) | 1638 | if (mce_ignore_ce || !iv) |
1605 | return; | 1639 | return; |
1606 | 1640 | ||
1607 | __this_cpu_write(mce_next_interval, iv); | ||
1608 | if (!iv) | ||
1609 | return; | ||
1610 | t->expires = round_jiffies(jiffies + iv); | 1641 | t->expires = round_jiffies(jiffies + iv); |
1611 | add_timer_on(t, smp_processor_id()); | 1642 | add_timer_on(t, smp_processor_id()); |
1612 | } | 1643 | } |
1613 | 1644 | ||
1645 | static void __mcheck_cpu_init_timer(void) | ||
1646 | { | ||
1647 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
1648 | unsigned int cpu = smp_processor_id(); | ||
1649 | |||
1650 | setup_timer(t, mce_timer_fn, cpu); | ||
1651 | mce_start_timer(cpu, t); | ||
1652 | } | ||
1653 | |||
1614 | /* Handle unconfigured int18 (should never happen) */ | 1654 | /* Handle unconfigured int18 (should never happen) */ |
1615 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 1655 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
1616 | { | 1656 | { |
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { | |||
1907 | * check, or 0 to not wait | 1947 | * check, or 0 to not wait |
1908 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | 1948 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
1909 | * mce=nobootlog Don't log MCEs from before booting. | 1949 | * mce=nobootlog Don't log MCEs from before booting. |
1950 | * mce=bios_cmci_threshold Don't program the CMCI threshold | ||
1910 | */ | 1951 | */ |
1911 | static int __init mcheck_enable(char *str) | 1952 | static int __init mcheck_enable(char *str) |
1912 | { | 1953 | { |
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str) | |||
1926 | mce_ignore_ce = 1; | 1967 | mce_ignore_ce = 1; |
1927 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1968 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
1928 | mce_bootlog = (str[0] == 'b'); | 1969 | mce_bootlog = (str[0] == 'b'); |
1970 | else if (!strcmp(str, "bios_cmci_threshold")) | ||
1971 | mce_bios_cmci_threshold = 1; | ||
1929 | else if (isdigit(str[0])) { | 1972 | else if (isdigit(str[0])) { |
1930 | get_option(&str, &tolerant); | 1973 | get_option(&str, &tolerant); |
1931 | if (*str == ',') { | 1974 | if (*str == ',') { |
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = { | |||
2166 | &mce_cmci_disabled | 2209 | &mce_cmci_disabled |
2167 | }; | 2210 | }; |
2168 | 2211 | ||
2212 | static struct dev_ext_attribute dev_attr_bios_cmci_threshold = { | ||
2213 | __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL), | ||
2214 | &mce_bios_cmci_threshold | ||
2215 | }; | ||
2216 | |||
2169 | static struct device_attribute *mce_device_attrs[] = { | 2217 | static struct device_attribute *mce_device_attrs[] = { |
2170 | &dev_attr_tolerant.attr, | 2218 | &dev_attr_tolerant.attr, |
2171 | &dev_attr_check_interval.attr, | 2219 | &dev_attr_check_interval.attr, |
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = { | |||
2174 | &dev_attr_dont_log_ce.attr, | 2222 | &dev_attr_dont_log_ce.attr, |
2175 | &dev_attr_ignore_ce.attr, | 2223 | &dev_attr_ignore_ce.attr, |
2176 | &dev_attr_cmci_disabled.attr, | 2224 | &dev_attr_cmci_disabled.attr, |
2225 | &dev_attr_bios_cmci_threshold.attr, | ||
2177 | NULL | 2226 | NULL |
2178 | }; | 2227 | }; |
2179 | 2228 | ||
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2294 | unsigned int cpu = (unsigned long)hcpu; | 2343 | unsigned int cpu = (unsigned long)hcpu; |
2295 | struct timer_list *t = &per_cpu(mce_timer, cpu); | 2344 | struct timer_list *t = &per_cpu(mce_timer, cpu); |
2296 | 2345 | ||
2297 | switch (action) { | 2346 | switch (action & ~CPU_TASKS_FROZEN) { |
2298 | case CPU_ONLINE: | 2347 | case CPU_ONLINE: |
2299 | case CPU_ONLINE_FROZEN: | ||
2300 | mce_device_create(cpu); | 2348 | mce_device_create(cpu); |
2301 | if (threshold_cpu_callback) | 2349 | if (threshold_cpu_callback) |
2302 | threshold_cpu_callback(action, cpu); | 2350 | threshold_cpu_callback(action, cpu); |
2303 | break; | 2351 | break; |
2304 | case CPU_DEAD: | 2352 | case CPU_DEAD: |
2305 | case CPU_DEAD_FROZEN: | ||
2306 | if (threshold_cpu_callback) | 2353 | if (threshold_cpu_callback) |
2307 | threshold_cpu_callback(action, cpu); | 2354 | threshold_cpu_callback(action, cpu); |
2308 | mce_device_remove(cpu); | 2355 | mce_device_remove(cpu); |
2356 | mce_intel_hcpu_update(cpu); | ||
2309 | break; | 2357 | break; |
2310 | case CPU_DOWN_PREPARE: | 2358 | case CPU_DOWN_PREPARE: |
2311 | case CPU_DOWN_PREPARE_FROZEN: | ||
2312 | del_timer_sync(t); | ||
2313 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | 2359 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); |
2360 | del_timer_sync(t); | ||
2314 | break; | 2361 | break; |
2315 | case CPU_DOWN_FAILED: | 2362 | case CPU_DOWN_FAILED: |
2316 | case CPU_DOWN_FAILED_FROZEN: | ||
2317 | if (!mce_ignore_ce && check_interval) { | ||
2318 | t->expires = round_jiffies(jiffies + | ||
2319 | per_cpu(mce_next_interval, cpu)); | ||
2320 | add_timer_on(t, cpu); | ||
2321 | } | ||
2322 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 2363 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
2364 | mce_start_timer(cpu, t); | ||
2323 | break; | 2365 | break; |
2324 | case CPU_POST_DEAD: | 2366 | } |
2367 | |||
2368 | if (action == CPU_POST_DEAD) { | ||
2325 | /* intentionally ignoring frozen here */ | 2369 | /* intentionally ignoring frozen here */ |
2326 | cmci_rediscover(cpu); | 2370 | cmci_rediscover(cpu); |
2327 | break; | ||
2328 | } | 2371 | } |
2372 | |||
2329 | return NOTIFY_OK; | 2373 | return NOTIFY_OK; |
2330 | } | 2374 | } |
2331 | 2375 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 38e49bc95ffc..5f88abf07e9c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
16 | #include <asm/mce.h> | 16 | #include <asm/mce.h> |
17 | 17 | ||
18 | #include "mce-internal.h" | ||
19 | |||
18 | /* | 20 | /* |
19 | * Support for Intel Correct Machine Check Interrupts. This allows | 21 | * Support for Intel Correct Machine Check Interrupts. This allows |
20 | * the CPU to raise an interrupt when a corrected machine check happened. | 22 | * the CPU to raise an interrupt when a corrected machine check happened. |
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | |||
30 | */ | 32 | */ |
31 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); | 33 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); |
32 | 34 | ||
33 | #define CMCI_THRESHOLD 1 | 35 | #define CMCI_THRESHOLD 1 |
36 | #define CMCI_POLL_INTERVAL (30 * HZ) | ||
37 | #define CMCI_STORM_INTERVAL (1 * HZ) | ||
38 | #define CMCI_STORM_THRESHOLD 15 | ||
39 | |||
40 | static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); | ||
41 | static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); | ||
42 | static DEFINE_PER_CPU(unsigned int, cmci_storm_state); | ||
43 | |||
44 | enum { | ||
45 | CMCI_STORM_NONE, | ||
46 | CMCI_STORM_ACTIVE, | ||
47 | CMCI_STORM_SUBSIDED, | ||
48 | }; | ||
49 | |||
50 | static atomic_t cmci_storm_on_cpus; | ||
34 | 51 | ||
35 | static int cmci_supported(int *banks) | 52 | static int cmci_supported(int *banks) |
36 | { | 53 | { |
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks) | |||
53 | return !!(cap & MCG_CMCI_P); | 70 | return !!(cap & MCG_CMCI_P); |
54 | } | 71 | } |
55 | 72 | ||
73 | void mce_intel_cmci_poll(void) | ||
74 | { | ||
75 | if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) | ||
76 | return; | ||
77 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
78 | } | ||
79 | |||
80 | void mce_intel_hcpu_update(unsigned long cpu) | ||
81 | { | ||
82 | if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) | ||
83 | atomic_dec(&cmci_storm_on_cpus); | ||
84 | |||
85 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; | ||
86 | } | ||
87 | |||
88 | unsigned long mce_intel_adjust_timer(unsigned long interval) | ||
89 | { | ||
90 | int r; | ||
91 | |||
92 | if (interval < CMCI_POLL_INTERVAL) | ||
93 | return interval; | ||
94 | |||
95 | switch (__this_cpu_read(cmci_storm_state)) { | ||
96 | case CMCI_STORM_ACTIVE: | ||
97 | /* | ||
98 | * We switch back to interrupt mode once the poll timer has | ||
99 | * silenced itself. That means no events recorded and the | ||
100 | * timer interval is back to our poll interval. | ||
101 | */ | ||
102 | __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); | ||
103 | r = atomic_sub_return(1, &cmci_storm_on_cpus); | ||
104 | if (r == 0) | ||
105 | pr_notice("CMCI storm subsided: switching to interrupt mode\n"); | ||
106 | /* FALLTHROUGH */ | ||
107 | |||
108 | case CMCI_STORM_SUBSIDED: | ||
109 | /* | ||
110 | * We wait for all cpus to go back to SUBSIDED | ||
111 | * state. When that happens we switch back to | ||
112 | * interrupt mode. | ||
113 | */ | ||
114 | if (!atomic_read(&cmci_storm_on_cpus)) { | ||
115 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); | ||
116 | cmci_reenable(); | ||
117 | cmci_recheck(); | ||
118 | } | ||
119 | return CMCI_POLL_INTERVAL; | ||
120 | default: | ||
121 | /* | ||
122 | * We have shiny weather. Let the poll do whatever it | ||
123 | * thinks. | ||
124 | */ | ||
125 | return interval; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static bool cmci_storm_detect(void) | ||
130 | { | ||
131 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); | ||
132 | unsigned long ts = __this_cpu_read(cmci_time_stamp); | ||
133 | unsigned long now = jiffies; | ||
134 | int r; | ||
135 | |||
136 | if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) | ||
137 | return true; | ||
138 | |||
139 | if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { | ||
140 | cnt++; | ||
141 | } else { | ||
142 | cnt = 1; | ||
143 | __this_cpu_write(cmci_time_stamp, now); | ||
144 | } | ||
145 | __this_cpu_write(cmci_storm_cnt, cnt); | ||
146 | |||
147 | if (cnt <= CMCI_STORM_THRESHOLD) | ||
148 | return false; | ||
149 | |||
150 | cmci_clear(); | ||
151 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); | ||
152 | r = atomic_add_return(1, &cmci_storm_on_cpus); | ||
153 | mce_timer_kick(CMCI_POLL_INTERVAL); | ||
154 | |||
155 | if (r == 1) | ||
156 | pr_notice("CMCI storm detected: switching to poll mode\n"); | ||
157 | return true; | ||
158 | } | ||
159 | |||
56 | /* | 160 | /* |
57 | * The interrupt handler. This is called on every event. | 161 | * The interrupt handler. This is called on every event. |
58 | * Just call the poller directly to log any events. | 162 | * Just call the poller directly to log any events. |
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks) | |||
61 | */ | 165 | */ |
62 | static void intel_threshold_interrupt(void) | 166 | static void intel_threshold_interrupt(void) |
63 | { | 167 | { |
168 | if (cmci_storm_detect()) | ||
169 | return; | ||
64 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | 170 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
65 | mce_notify_irq(); | 171 | mce_notify_irq(); |
66 | } | 172 | } |
67 | 173 | ||
68 | static void print_update(char *type, int *hdr, int num) | ||
69 | { | ||
70 | if (*hdr == 0) | ||
71 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
72 | *hdr = 1; | ||
73 | printk(KERN_CONT " %s:%d", type, num); | ||
74 | } | ||
75 | |||
76 | /* | 174 | /* |
77 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | 175 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks |
78 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | 176 | * on this CPU. Use the algorithm recommended in the SDM to discover shared |
79 | * banks. | 177 | * banks. |
80 | */ | 178 | */ |
81 | static void cmci_discover(int banks, int boot) | 179 | static void cmci_discover(int banks) |
82 | { | 180 | { |
83 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | 181 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); |
84 | unsigned long flags; | 182 | unsigned long flags; |
85 | int hdr = 0; | ||
86 | int i; | 183 | int i; |
184 | int bios_wrong_thresh = 0; | ||
87 | 185 | ||
88 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | 186 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
89 | for (i = 0; i < banks; i++) { | 187 | for (i = 0; i < banks; i++) { |
90 | u64 val; | 188 | u64 val; |
189 | int bios_zero_thresh = 0; | ||
91 | 190 | ||
92 | if (test_bit(i, owned)) | 191 | if (test_bit(i, owned)) |
93 | continue; | 192 | continue; |
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot) | |||
96 | 195 | ||
97 | /* Already owned by someone else? */ | 196 | /* Already owned by someone else? */ |
98 | if (val & MCI_CTL2_CMCI_EN) { | 197 | if (val & MCI_CTL2_CMCI_EN) { |
99 | if (test_and_clear_bit(i, owned) && !boot) | 198 | clear_bit(i, owned); |
100 | print_update("SHD", &hdr, i); | ||
101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 199 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
102 | continue; | 200 | continue; |
103 | } | 201 | } |
104 | 202 | ||
105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; | 203 | if (!mce_bios_cmci_threshold) { |
106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | 204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
205 | val |= CMCI_THRESHOLD; | ||
206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { | ||
207 | /* | ||
208 | * If bios_cmci_threshold boot option was specified | ||
209 | * but the threshold is zero, we'll try to initialize | ||
210 | * it to 1. | ||
211 | */ | ||
212 | bios_zero_thresh = 1; | ||
213 | val |= CMCI_THRESHOLD; | ||
214 | } | ||
215 | |||
216 | val |= MCI_CTL2_CMCI_EN; | ||
107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 217 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 218 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
109 | 219 | ||
110 | /* Did the enable bit stick? -- the bank supports CMCI */ | 220 | /* Did the enable bit stick? -- the bank supports CMCI */ |
111 | if (val & MCI_CTL2_CMCI_EN) { | 221 | if (val & MCI_CTL2_CMCI_EN) { |
112 | if (!test_and_set_bit(i, owned) && !boot) | 222 | set_bit(i, owned); |
113 | print_update("CMCI", &hdr, i); | ||
114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 223 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
224 | /* | ||
225 | * We are able to set thresholds for some banks that | ||
226 | * had a threshold of 0. This means the BIOS has not | ||
227 | * set the thresholds properly or does not work with | ||
228 | * this boot option. Note down now and report later. | ||
229 | */ | ||
230 | if (mce_bios_cmci_threshold && bios_zero_thresh && | ||
231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) | ||
232 | bios_wrong_thresh = 1; | ||
115 | } else { | 233 | } else { |
116 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | 234 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
117 | } | 235 | } |
118 | } | 236 | } |
119 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
120 | if (hdr) | 238 | if (mce_bios_cmci_threshold && bios_wrong_thresh) { |
121 | printk(KERN_CONT "\n"); | 239 | pr_info_once( |
240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); | ||
241 | pr_info_once( | ||
242 | "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); | ||
243 | } | ||
122 | } | 244 | } |
123 | 245 | ||
124 | /* | 246 | /* |
@@ -156,7 +278,7 @@ void cmci_clear(void) | |||
156 | continue; | 278 | continue; |
157 | /* Disable CMCI */ | 279 | /* Disable CMCI */ |
158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 280 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); | 281 | val &= ~MCI_CTL2_CMCI_EN; |
160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 282 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 283 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
162 | } | 284 | } |
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying) | |||
186 | continue; | 308 | continue; |
187 | /* Recheck banks in case CPUs don't all have the same */ | 309 | /* Recheck banks in case CPUs don't all have the same */ |
188 | if (cmci_supported(&banks)) | 310 | if (cmci_supported(&banks)) |
189 | cmci_discover(banks, 0); | 311 | cmci_discover(banks); |
190 | } | 312 | } |
191 | 313 | ||
192 | set_cpus_allowed_ptr(current, old); | 314 | set_cpus_allowed_ptr(current, old); |
@@ -200,7 +322,7 @@ void cmci_reenable(void) | |||
200 | { | 322 | { |
201 | int banks; | 323 | int banks; |
202 | if (cmci_supported(&banks)) | 324 | if (cmci_supported(&banks)) |
203 | cmci_discover(banks, 0); | 325 | cmci_discover(banks); |
204 | } | 326 | } |
205 | 327 | ||
206 | static void intel_init_cmci(void) | 328 | static void intel_init_cmci(void) |
@@ -211,7 +333,7 @@ static void intel_init_cmci(void) | |||
211 | return; | 333 | return; |
212 | 334 | ||
213 | mce_threshold_vector = intel_threshold_interrupt; | 335 | mce_threshold_vector = intel_threshold_interrupt; |
214 | cmci_discover(banks, 1); | 336 | cmci_discover(banks); |
215 | /* | 337 | /* |
216 | * For CPU #0 this runs with still disabled APIC, but that's | 338 | * For CPU #0 this runs with still disabled APIC, but that's |
217 | * ok because only the vector is set up. We still do another | 339 | * ok because only the vector is set up. We still do another |
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl index c7b3fe2d72e0..091972ef49de 100644 --- a/arch/x86/kernel/cpu/mkcapflags.pl +++ b/arch/x86/kernel/cpu/mkcapflags.pl | |||
@@ -8,7 +8,10 @@ | |||
8 | open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; | 8 | open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; |
9 | open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; | 9 | open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; |
10 | 10 | ||
11 | print OUT "#include <asm/cpufeature.h>\n\n"; | 11 | print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n"; |
12 | print OUT "#include <asm/cpufeature.h>\n"; | ||
13 | print OUT "#endif\n"; | ||
14 | print OUT "\n"; | ||
12 | print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; | 15 | print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; |
13 | 16 | ||
14 | %features = (); | 17 | %features = (); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 38e4894165b9..99d96a4978b5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -1950,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp | |||
1950 | static struct intel_uncore_box * | 1950 | static struct intel_uncore_box * |
1951 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | 1951 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) |
1952 | { | 1952 | { |
1953 | static struct intel_uncore_box *box; | 1953 | struct intel_uncore_box *box; |
1954 | 1954 | ||
1955 | box = *per_cpu_ptr(pmu->box, cpu); | 1955 | box = *per_cpu_ptr(pmu->box, cpu); |
1956 | if (box) | 1956 | if (box) |
@@ -2347,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event) | |||
2347 | return ret; | 2347 | return ret; |
2348 | } | 2348 | } |
2349 | 2349 | ||
2350 | static ssize_t uncore_get_attr_cpumask(struct device *dev, | ||
2351 | struct device_attribute *attr, char *buf) | ||
2352 | { | ||
2353 | int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); | ||
2354 | |||
2355 | buf[n++] = '\n'; | ||
2356 | buf[n] = '\0'; | ||
2357 | return n; | ||
2358 | } | ||
2359 | |||
2360 | static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); | ||
2361 | |||
2362 | static struct attribute *uncore_pmu_attrs[] = { | ||
2363 | &dev_attr_cpumask.attr, | ||
2364 | NULL, | ||
2365 | }; | ||
2366 | |||
2367 | static struct attribute_group uncore_pmu_attr_group = { | ||
2368 | .attrs = uncore_pmu_attrs, | ||
2369 | }; | ||
2370 | |||
2350 | static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) | 2371 | static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) |
2351 | { | 2372 | { |
2352 | int ret; | 2373 | int ret; |
@@ -2384,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) | |||
2384 | free_percpu(type->pmus[i].box); | 2405 | free_percpu(type->pmus[i].box); |
2385 | kfree(type->pmus); | 2406 | kfree(type->pmus); |
2386 | type->pmus = NULL; | 2407 | type->pmus = NULL; |
2387 | kfree(type->attr_groups[1]); | 2408 | kfree(type->events_group); |
2388 | type->attr_groups[1] = NULL; | 2409 | type->events_group = NULL; |
2389 | } | 2410 | } |
2390 | 2411 | ||
2391 | static void __init uncore_types_exit(struct intel_uncore_type **types) | 2412 | static void __init uncore_types_exit(struct intel_uncore_type **types) |
@@ -2437,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) | |||
2437 | for (j = 0; j < i; j++) | 2458 | for (j = 0; j < i; j++) |
2438 | attrs[j] = &type->event_descs[j].attr.attr; | 2459 | attrs[j] = &type->event_descs[j].attr.attr; |
2439 | 2460 | ||
2440 | type->attr_groups[1] = events_group; | 2461 | type->events_group = events_group; |
2441 | } | 2462 | } |
2442 | 2463 | ||
2464 | type->pmu_group = &uncore_pmu_attr_group; | ||
2443 | type->pmus = pmus; | 2465 | type->pmus = pmus; |
2444 | return 0; | 2466 | return 0; |
2445 | fail: | 2467 | fail: |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c1856aac..e68a4550e952 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -369,10 +369,12 @@ struct intel_uncore_type { | |||
369 | struct intel_uncore_pmu *pmus; | 369 | struct intel_uncore_pmu *pmus; |
370 | struct intel_uncore_ops *ops; | 370 | struct intel_uncore_ops *ops; |
371 | struct uncore_event_desc *event_descs; | 371 | struct uncore_event_desc *event_descs; |
372 | const struct attribute_group *attr_groups[3]; | 372 | const struct attribute_group *attr_groups[4]; |
373 | }; | 373 | }; |
374 | 374 | ||
375 | #define format_group attr_groups[0] | 375 | #define pmu_group attr_groups[0] |
376 | #define format_group attr_groups[1] | ||
377 | #define events_group attr_groups[2] | ||
376 | 378 | ||
377 | struct intel_uncore_ops { | 379 | struct intel_uncore_ops { |
378 | void (*init_box)(struct intel_uncore_box *); | 380 | void (*init_box)(struct intel_uncore_box *); |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 8022c6681485..fbd895562292 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
140 | 140 | ||
141 | static void *c_start(struct seq_file *m, loff_t *pos) | 141 | static void *c_start(struct seq_file *m, loff_t *pos) |
142 | { | 142 | { |
143 | if (*pos == 0) /* just in case, cpu 0 is not the first */ | 143 | *pos = cpumask_next(*pos - 1, cpu_online_mask); |
144 | *pos = cpumask_first(cpu_online_mask); | ||
145 | else | ||
146 | *pos = cpumask_next(*pos - 1, cpu_online_mask); | ||
147 | if ((*pos) < nr_cpu_ids) | 144 | if ((*pos) < nr_cpu_ids) |
148 | return &cpu_data(*pos); | 145 | return &cpu_data(*pos); |
149 | return NULL; | 146 | return NULL; |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd2323f..60c78917190c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -199,12 +199,14 @@ static int __init cpuid_init(void) | |||
199 | goto out_chrdev; | 199 | goto out_chrdev; |
200 | } | 200 | } |
201 | cpuid_class->devnode = cpuid_devnode; | 201 | cpuid_class->devnode = cpuid_devnode; |
202 | get_online_cpus(); | ||
202 | for_each_online_cpu(i) { | 203 | for_each_online_cpu(i) { |
203 | err = cpuid_device_create(i); | 204 | err = cpuid_device_create(i); |
204 | if (err != 0) | 205 | if (err != 0) |
205 | goto out_class; | 206 | goto out_class; |
206 | } | 207 | } |
207 | register_hotcpu_notifier(&cpuid_class_cpu_notifier); | 208 | register_hotcpu_notifier(&cpuid_class_cpu_notifier); |
209 | put_online_cpus(); | ||
208 | 210 | ||
209 | err = 0; | 211 | err = 0; |
210 | goto out; | 212 | goto out; |
@@ -214,6 +216,7 @@ out_class: | |||
214 | for_each_online_cpu(i) { | 216 | for_each_online_cpu(i) { |
215 | cpuid_device_destroy(i); | 217 | cpuid_device_destroy(i); |
216 | } | 218 | } |
219 | put_online_cpus(); | ||
217 | class_destroy(cpuid_class); | 220 | class_destroy(cpuid_class); |
218 | out_chrdev: | 221 | out_chrdev: |
219 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); | 222 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void) | |||
225 | { | 228 | { |
226 | int cpu = 0; | 229 | int cpu = 0; |
227 | 230 | ||
231 | get_online_cpus(); | ||
228 | for_each_online_cpu(cpu) | 232 | for_each_online_cpu(cpu) |
229 | cpuid_device_destroy(cpu); | 233 | cpuid_device_destroy(cpu); |
230 | class_destroy(cpuid_class); | 234 | class_destroy(cpuid_class); |
231 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); | 235 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
232 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); | 236 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); |
237 | put_online_cpus(); | ||
233 | } | 238 | } |
234 | 239 | ||
235 | module_init(cpuid_init); | 240 | module_init(cpuid_init); |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced4a874..b1581527a236 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { | |||
342 | .xlate = ioapic_xlate, | 342 | .xlate = ioapic_xlate, |
343 | }; | 343 | }; |
344 | 344 | ||
345 | static void dt_add_ioapic_domain(unsigned int ioapic_num, | ||
346 | struct device_node *np) | ||
347 | { | ||
348 | struct irq_domain *id; | ||
349 | struct mp_ioapic_gsi *gsi_cfg; | ||
350 | int ret; | ||
351 | int num; | ||
352 | |||
353 | gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); | ||
354 | num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; | ||
355 | |||
356 | id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, | ||
357 | (void *)ioapic_num); | ||
358 | BUG_ON(!id); | ||
359 | if (gsi_cfg->gsi_base == 0) { | ||
360 | /* | ||
361 | * The first NR_IRQS_LEGACY irq descs are allocated in | ||
362 | * early_irq_init() and need just a mapping. The | ||
363 | * remaining irqs need both. All of them are preallocated | ||
364 | * and assigned so we can keep the 1:1 mapping which the ioapic | ||
365 | * is having. | ||
366 | */ | ||
367 | ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); | ||
368 | if (ret) | ||
369 | pr_err("Error mapping legacy IRQs: %d\n", ret); | ||
370 | |||
371 | if (num > NR_IRQS_LEGACY) { | ||
372 | ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, | ||
373 | NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); | ||
374 | if (ret) | ||
375 | pr_err("Error creating mapping for the " | ||
376 | "remaining IRQs: %d\n", ret); | ||
377 | } | ||
378 | irq_set_default_host(id); | ||
379 | } else { | ||
380 | ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); | ||
381 | if (ret) | ||
382 | pr_err("Error creating IRQ mapping: %d\n", ret); | ||
383 | } | ||
384 | } | ||
385 | |||
345 | static void __init ioapic_add_ofnode(struct device_node *np) | 386 | static void __init ioapic_add_ofnode(struct device_node *np) |
346 | { | 387 | { |
347 | struct resource r; | 388 | struct resource r; |
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) | |||
356 | 397 | ||
357 | for (i = 0; i < nr_ioapics; i++) { | 398 | for (i = 0; i < nr_ioapics; i++) { |
358 | if (r.start == mpc_ioapic_addr(i)) { | 399 | if (r.start == mpc_ioapic_addr(i)) { |
359 | struct irq_domain *id; | 400 | dt_add_ioapic_domain(i, np); |
360 | struct mp_ioapic_gsi *gsi_cfg; | ||
361 | |||
362 | gsi_cfg = mp_ioapic_gsi_routing(i); | ||
363 | |||
364 | id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, | ||
365 | &ioapic_irq_domain_ops, | ||
366 | (void*)i); | ||
367 | BUG_ON(!id); | ||
368 | return; | 401 | return; |
369 | } | 402 | } |
370 | } | 403 | } |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f28837476..0750e3ba87c0 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <asm/cpufeature.h> | 57 | #include <asm/cpufeature.h> |
58 | #include <asm/alternative-asm.h> | 58 | #include <asm/alternative-asm.h> |
59 | #include <asm/asm.h> | 59 | #include <asm/asm.h> |
60 | #include <asm/smap.h> | ||
60 | 61 | ||
61 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 62 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
62 | #include <linux/elf-em.h> | 63 | #include <linux/elf-em.h> |
@@ -407,7 +408,9 @@ sysenter_past_esp: | |||
407 | */ | 408 | */ |
408 | cmpl $__PAGE_OFFSET-3,%ebp | 409 | cmpl $__PAGE_OFFSET-3,%ebp |
409 | jae syscall_fault | 410 | jae syscall_fault |
411 | ASM_STAC | ||
410 | 1: movl (%ebp),%ebp | 412 | 1: movl (%ebp),%ebp |
413 | ASM_CLAC | ||
411 | movl %ebp,PT_EBP(%esp) | 414 | movl %ebp,PT_EBP(%esp) |
412 | _ASM_EXTABLE(1b,syscall_fault) | 415 | _ASM_EXTABLE(1b,syscall_fault) |
413 | 416 | ||
@@ -488,6 +491,7 @@ ENDPROC(ia32_sysenter_target) | |||
488 | # system call handler stub | 491 | # system call handler stub |
489 | ENTRY(system_call) | 492 | ENTRY(system_call) |
490 | RING0_INT_FRAME # can't unwind into user space anyway | 493 | RING0_INT_FRAME # can't unwind into user space anyway |
494 | ASM_CLAC | ||
491 | pushl_cfi %eax # save orig_eax | 495 | pushl_cfi %eax # save orig_eax |
492 | SAVE_ALL | 496 | SAVE_ALL |
493 | GET_THREAD_INFO(%ebp) | 497 | GET_THREAD_INFO(%ebp) |
@@ -670,6 +674,7 @@ END(syscall_exit_work) | |||
670 | 674 | ||
671 | RING0_INT_FRAME # can't unwind into user space anyway | 675 | RING0_INT_FRAME # can't unwind into user space anyway |
672 | syscall_fault: | 676 | syscall_fault: |
677 | ASM_CLAC | ||
673 | GET_THREAD_INFO(%ebp) | 678 | GET_THREAD_INFO(%ebp) |
674 | movl $-EFAULT,PT_EAX(%esp) | 679 | movl $-EFAULT,PT_EAX(%esp) |
675 | jmp resume_userspace | 680 | jmp resume_userspace |
@@ -825,6 +830,7 @@ END(interrupt) | |||
825 | */ | 830 | */ |
826 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 831 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
827 | common_interrupt: | 832 | common_interrupt: |
833 | ASM_CLAC | ||
828 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ | 834 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ |
829 | SAVE_ALL | 835 | SAVE_ALL |
830 | TRACE_IRQS_OFF | 836 | TRACE_IRQS_OFF |
@@ -841,6 +847,7 @@ ENDPROC(common_interrupt) | |||
841 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 847 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
842 | ENTRY(name) \ | 848 | ENTRY(name) \ |
843 | RING0_INT_FRAME; \ | 849 | RING0_INT_FRAME; \ |
850 | ASM_CLAC; \ | ||
844 | pushl_cfi $~(nr); \ | 851 | pushl_cfi $~(nr); \ |
845 | SAVE_ALL; \ | 852 | SAVE_ALL; \ |
846 | TRACE_IRQS_OFF \ | 853 | TRACE_IRQS_OFF \ |
@@ -857,6 +864,7 @@ ENDPROC(name) | |||
857 | 864 | ||
858 | ENTRY(coprocessor_error) | 865 | ENTRY(coprocessor_error) |
859 | RING0_INT_FRAME | 866 | RING0_INT_FRAME |
867 | ASM_CLAC | ||
860 | pushl_cfi $0 | 868 | pushl_cfi $0 |
861 | pushl_cfi $do_coprocessor_error | 869 | pushl_cfi $do_coprocessor_error |
862 | jmp error_code | 870 | jmp error_code |
@@ -865,6 +873,7 @@ END(coprocessor_error) | |||
865 | 873 | ||
866 | ENTRY(simd_coprocessor_error) | 874 | ENTRY(simd_coprocessor_error) |
867 | RING0_INT_FRAME | 875 | RING0_INT_FRAME |
876 | ASM_CLAC | ||
868 | pushl_cfi $0 | 877 | pushl_cfi $0 |
869 | #ifdef CONFIG_X86_INVD_BUG | 878 | #ifdef CONFIG_X86_INVD_BUG |
870 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | 879 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ |
@@ -886,6 +895,7 @@ END(simd_coprocessor_error) | |||
886 | 895 | ||
887 | ENTRY(device_not_available) | 896 | ENTRY(device_not_available) |
888 | RING0_INT_FRAME | 897 | RING0_INT_FRAME |
898 | ASM_CLAC | ||
889 | pushl_cfi $-1 # mark this as an int | 899 | pushl_cfi $-1 # mark this as an int |
890 | pushl_cfi $do_device_not_available | 900 | pushl_cfi $do_device_not_available |
891 | jmp error_code | 901 | jmp error_code |
@@ -906,6 +916,7 @@ END(native_irq_enable_sysexit) | |||
906 | 916 | ||
907 | ENTRY(overflow) | 917 | ENTRY(overflow) |
908 | RING0_INT_FRAME | 918 | RING0_INT_FRAME |
919 | ASM_CLAC | ||
909 | pushl_cfi $0 | 920 | pushl_cfi $0 |
910 | pushl_cfi $do_overflow | 921 | pushl_cfi $do_overflow |
911 | jmp error_code | 922 | jmp error_code |
@@ -914,6 +925,7 @@ END(overflow) | |||
914 | 925 | ||
915 | ENTRY(bounds) | 926 | ENTRY(bounds) |
916 | RING0_INT_FRAME | 927 | RING0_INT_FRAME |
928 | ASM_CLAC | ||
917 | pushl_cfi $0 | 929 | pushl_cfi $0 |
918 | pushl_cfi $do_bounds | 930 | pushl_cfi $do_bounds |
919 | jmp error_code | 931 | jmp error_code |
@@ -922,6 +934,7 @@ END(bounds) | |||
922 | 934 | ||
923 | ENTRY(invalid_op) | 935 | ENTRY(invalid_op) |
924 | RING0_INT_FRAME | 936 | RING0_INT_FRAME |
937 | ASM_CLAC | ||
925 | pushl_cfi $0 | 938 | pushl_cfi $0 |
926 | pushl_cfi $do_invalid_op | 939 | pushl_cfi $do_invalid_op |
927 | jmp error_code | 940 | jmp error_code |
@@ -930,6 +943,7 @@ END(invalid_op) | |||
930 | 943 | ||
931 | ENTRY(coprocessor_segment_overrun) | 944 | ENTRY(coprocessor_segment_overrun) |
932 | RING0_INT_FRAME | 945 | RING0_INT_FRAME |
946 | ASM_CLAC | ||
933 | pushl_cfi $0 | 947 | pushl_cfi $0 |
934 | pushl_cfi $do_coprocessor_segment_overrun | 948 | pushl_cfi $do_coprocessor_segment_overrun |
935 | jmp error_code | 949 | jmp error_code |
@@ -938,6 +952,7 @@ END(coprocessor_segment_overrun) | |||
938 | 952 | ||
939 | ENTRY(invalid_TSS) | 953 | ENTRY(invalid_TSS) |
940 | RING0_EC_FRAME | 954 | RING0_EC_FRAME |
955 | ASM_CLAC | ||
941 | pushl_cfi $do_invalid_TSS | 956 | pushl_cfi $do_invalid_TSS |
942 | jmp error_code | 957 | jmp error_code |
943 | CFI_ENDPROC | 958 | CFI_ENDPROC |
@@ -945,6 +960,7 @@ END(invalid_TSS) | |||
945 | 960 | ||
946 | ENTRY(segment_not_present) | 961 | ENTRY(segment_not_present) |
947 | RING0_EC_FRAME | 962 | RING0_EC_FRAME |
963 | ASM_CLAC | ||
948 | pushl_cfi $do_segment_not_present | 964 | pushl_cfi $do_segment_not_present |
949 | jmp error_code | 965 | jmp error_code |
950 | CFI_ENDPROC | 966 | CFI_ENDPROC |
@@ -952,6 +968,7 @@ END(segment_not_present) | |||
952 | 968 | ||
953 | ENTRY(stack_segment) | 969 | ENTRY(stack_segment) |
954 | RING0_EC_FRAME | 970 | RING0_EC_FRAME |
971 | ASM_CLAC | ||
955 | pushl_cfi $do_stack_segment | 972 | pushl_cfi $do_stack_segment |
956 | jmp error_code | 973 | jmp error_code |
957 | CFI_ENDPROC | 974 | CFI_ENDPROC |
@@ -959,6 +976,7 @@ END(stack_segment) | |||
959 | 976 | ||
960 | ENTRY(alignment_check) | 977 | ENTRY(alignment_check) |
961 | RING0_EC_FRAME | 978 | RING0_EC_FRAME |
979 | ASM_CLAC | ||
962 | pushl_cfi $do_alignment_check | 980 | pushl_cfi $do_alignment_check |
963 | jmp error_code | 981 | jmp error_code |
964 | CFI_ENDPROC | 982 | CFI_ENDPROC |
@@ -966,6 +984,7 @@ END(alignment_check) | |||
966 | 984 | ||
967 | ENTRY(divide_error) | 985 | ENTRY(divide_error) |
968 | RING0_INT_FRAME | 986 | RING0_INT_FRAME |
987 | ASM_CLAC | ||
969 | pushl_cfi $0 # no error code | 988 | pushl_cfi $0 # no error code |
970 | pushl_cfi $do_divide_error | 989 | pushl_cfi $do_divide_error |
971 | jmp error_code | 990 | jmp error_code |
@@ -975,6 +994,7 @@ END(divide_error) | |||
975 | #ifdef CONFIG_X86_MCE | 994 | #ifdef CONFIG_X86_MCE |
976 | ENTRY(machine_check) | 995 | ENTRY(machine_check) |
977 | RING0_INT_FRAME | 996 | RING0_INT_FRAME |
997 | ASM_CLAC | ||
978 | pushl_cfi $0 | 998 | pushl_cfi $0 |
979 | pushl_cfi machine_check_vector | 999 | pushl_cfi machine_check_vector |
980 | jmp error_code | 1000 | jmp error_code |
@@ -984,6 +1004,7 @@ END(machine_check) | |||
984 | 1004 | ||
985 | ENTRY(spurious_interrupt_bug) | 1005 | ENTRY(spurious_interrupt_bug) |
986 | RING0_INT_FRAME | 1006 | RING0_INT_FRAME |
1007 | ASM_CLAC | ||
987 | pushl_cfi $0 | 1008 | pushl_cfi $0 |
988 | pushl_cfi $do_spurious_interrupt_bug | 1009 | pushl_cfi $do_spurious_interrupt_bug |
989 | jmp error_code | 1010 | jmp error_code |
@@ -1109,17 +1130,21 @@ ENTRY(ftrace_caller) | |||
1109 | pushl %eax | 1130 | pushl %eax |
1110 | pushl %ecx | 1131 | pushl %ecx |
1111 | pushl %edx | 1132 | pushl %edx |
1112 | movl 0xc(%esp), %eax | 1133 | pushl $0 /* Pass NULL as regs pointer */ |
1134 | movl 4*4(%esp), %eax | ||
1113 | movl 0x4(%ebp), %edx | 1135 | movl 0x4(%ebp), %edx |
1136 | leal function_trace_op, %ecx | ||
1114 | subl $MCOUNT_INSN_SIZE, %eax | 1137 | subl $MCOUNT_INSN_SIZE, %eax |
1115 | 1138 | ||
1116 | .globl ftrace_call | 1139 | .globl ftrace_call |
1117 | ftrace_call: | 1140 | ftrace_call: |
1118 | call ftrace_stub | 1141 | call ftrace_stub |
1119 | 1142 | ||
1143 | addl $4,%esp /* skip NULL pointer */ | ||
1120 | popl %edx | 1144 | popl %edx |
1121 | popl %ecx | 1145 | popl %ecx |
1122 | popl %eax | 1146 | popl %eax |
1147 | ftrace_ret: | ||
1123 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1148 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1124 | .globl ftrace_graph_call | 1149 | .globl ftrace_graph_call |
1125 | ftrace_graph_call: | 1150 | ftrace_graph_call: |
@@ -1131,6 +1156,71 @@ ftrace_stub: | |||
1131 | ret | 1156 | ret |
1132 | END(ftrace_caller) | 1157 | END(ftrace_caller) |
1133 | 1158 | ||
1159 | ENTRY(ftrace_regs_caller) | ||
1160 | pushf /* push flags before compare (in cs location) */ | ||
1161 | cmpl $0, function_trace_stop | ||
1162 | jne ftrace_restore_flags | ||
1163 | |||
1164 | /* | ||
1165 | * i386 does not save SS and ESP when coming from kernel. | ||
1166 | * Instead, to get sp, ®s->sp is used (see ptrace.h). | ||
1167 | * Unfortunately, that means eflags must be at the same location | ||
1168 | * as the current return ip is. We move the return ip into the | ||
1169 | * ip location, and move flags into the return ip location. | ||
1170 | */ | ||
1171 | pushl 4(%esp) /* save return ip into ip slot */ | ||
1172 | |||
1173 | pushl $0 /* Load 0 into orig_ax */ | ||
1174 | pushl %gs | ||
1175 | pushl %fs | ||
1176 | pushl %es | ||
1177 | pushl %ds | ||
1178 | pushl %eax | ||
1179 | pushl %ebp | ||
1180 | pushl %edi | ||
1181 | pushl %esi | ||
1182 | pushl %edx | ||
1183 | pushl %ecx | ||
1184 | pushl %ebx | ||
1185 | |||
1186 | movl 13*4(%esp), %eax /* Get the saved flags */ | ||
1187 | movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ | ||
1188 | /* clobbering return ip */ | ||
1189 | movl $__KERNEL_CS,13*4(%esp) | ||
1190 | |||
1191 | movl 12*4(%esp), %eax /* Load ip (1st parameter) */ | ||
1192 | subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ | ||
1193 | movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ | ||
1194 | leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ | ||
1195 | pushl %esp /* Save pt_regs as 4th parameter */ | ||
1196 | |||
1197 | GLOBAL(ftrace_regs_call) | ||
1198 | call ftrace_stub | ||
1199 | |||
1200 | addl $4, %esp /* Skip pt_regs */ | ||
1201 | movl 14*4(%esp), %eax /* Move flags back into cs */ | ||
1202 | movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ | ||
1203 | movl 12*4(%esp), %eax /* Get return ip from regs->ip */ | ||
1204 | movl %eax, 14*4(%esp) /* Put return ip back for ret */ | ||
1205 | |||
1206 | popl %ebx | ||
1207 | popl %ecx | ||
1208 | popl %edx | ||
1209 | popl %esi | ||
1210 | popl %edi | ||
1211 | popl %ebp | ||
1212 | popl %eax | ||
1213 | popl %ds | ||
1214 | popl %es | ||
1215 | popl %fs | ||
1216 | popl %gs | ||
1217 | addl $8, %esp /* Skip orig_ax and ip */ | ||
1218 | popf /* Pop flags at end (no addl to corrupt flags) */ | ||
1219 | jmp ftrace_ret | ||
1220 | |||
1221 | ftrace_restore_flags: | ||
1222 | popf | ||
1223 | jmp ftrace_stub | ||
1134 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 1224 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
1135 | 1225 | ||
1136 | ENTRY(mcount) | 1226 | ENTRY(mcount) |
@@ -1171,9 +1261,6 @@ END(mcount) | |||
1171 | 1261 | ||
1172 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1262 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1173 | ENTRY(ftrace_graph_caller) | 1263 | ENTRY(ftrace_graph_caller) |
1174 | cmpl $0, function_trace_stop | ||
1175 | jne ftrace_stub | ||
1176 | |||
1177 | pushl %eax | 1264 | pushl %eax |
1178 | pushl %ecx | 1265 | pushl %ecx |
1179 | pushl %edx | 1266 | pushl %edx |
@@ -1207,6 +1294,7 @@ return_to_handler: | |||
1207 | 1294 | ||
1208 | ENTRY(page_fault) | 1295 | ENTRY(page_fault) |
1209 | RING0_EC_FRAME | 1296 | RING0_EC_FRAME |
1297 | ASM_CLAC | ||
1210 | pushl_cfi $do_page_fault | 1298 | pushl_cfi $do_page_fault |
1211 | ALIGN | 1299 | ALIGN |
1212 | error_code: | 1300 | error_code: |
@@ -1279,6 +1367,7 @@ END(page_fault) | |||
1279 | 1367 | ||
1280 | ENTRY(debug) | 1368 | ENTRY(debug) |
1281 | RING0_INT_FRAME | 1369 | RING0_INT_FRAME |
1370 | ASM_CLAC | ||
1282 | cmpl $ia32_sysenter_target,(%esp) | 1371 | cmpl $ia32_sysenter_target,(%esp) |
1283 | jne debug_stack_correct | 1372 | jne debug_stack_correct |
1284 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn | 1373 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn |
@@ -1303,6 +1392,7 @@ END(debug) | |||
1303 | */ | 1392 | */ |
1304 | ENTRY(nmi) | 1393 | ENTRY(nmi) |
1305 | RING0_INT_FRAME | 1394 | RING0_INT_FRAME |
1395 | ASM_CLAC | ||
1306 | pushl_cfi %eax | 1396 | pushl_cfi %eax |
1307 | movl %ss, %eax | 1397 | movl %ss, %eax |
1308 | cmpw $__ESPFIX_SS, %ax | 1398 | cmpw $__ESPFIX_SS, %ax |
@@ -1373,6 +1463,7 @@ END(nmi) | |||
1373 | 1463 | ||
1374 | ENTRY(int3) | 1464 | ENTRY(int3) |
1375 | RING0_INT_FRAME | 1465 | RING0_INT_FRAME |
1466 | ASM_CLAC | ||
1376 | pushl_cfi $-1 # mark this as an int | 1467 | pushl_cfi $-1 # mark this as an int |
1377 | SAVE_ALL | 1468 | SAVE_ALL |
1378 | TRACE_IRQS_OFF | 1469 | TRACE_IRQS_OFF |
@@ -1393,6 +1484,7 @@ END(general_protection) | |||
1393 | #ifdef CONFIG_KVM_GUEST | 1484 | #ifdef CONFIG_KVM_GUEST |
1394 | ENTRY(async_page_fault) | 1485 | ENTRY(async_page_fault) |
1395 | RING0_EC_FRAME | 1486 | RING0_EC_FRAME |
1487 | ASM_CLAC | ||
1396 | pushl_cfi $do_async_page_fault | 1488 | pushl_cfi $do_async_page_fault |
1397 | jmp error_code | 1489 | jmp error_code |
1398 | CFI_ENDPROC | 1490 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834f..44531acd9a81 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -56,6 +56,8 @@ | |||
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <asm/asm.h> | 58 | #include <asm/asm.h> |
59 | #include <asm/rcu.h> | ||
60 | #include <asm/smap.h> | ||
59 | #include <linux/err.h> | 61 | #include <linux/err.h> |
60 | 62 | ||
61 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 63 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
@@ -68,25 +70,51 @@ | |||
68 | .section .entry.text, "ax" | 70 | .section .entry.text, "ax" |
69 | 71 | ||
70 | #ifdef CONFIG_FUNCTION_TRACER | 72 | #ifdef CONFIG_FUNCTION_TRACER |
73 | |||
74 | #ifdef CC_USING_FENTRY | ||
75 | # define function_hook __fentry__ | ||
76 | #else | ||
77 | # define function_hook mcount | ||
78 | #endif | ||
79 | |||
71 | #ifdef CONFIG_DYNAMIC_FTRACE | 80 | #ifdef CONFIG_DYNAMIC_FTRACE |
72 | ENTRY(mcount) | 81 | |
82 | ENTRY(function_hook) | ||
73 | retq | 83 | retq |
74 | END(mcount) | 84 | END(function_hook) |
85 | |||
86 | /* skip is set if stack has been adjusted */ | ||
87 | .macro ftrace_caller_setup skip=0 | ||
88 | MCOUNT_SAVE_FRAME \skip | ||
89 | |||
90 | /* Load the ftrace_ops into the 3rd parameter */ | ||
91 | leaq function_trace_op, %rdx | ||
92 | |||
93 | /* Load ip into the first parameter */ | ||
94 | movq RIP(%rsp), %rdi | ||
95 | subq $MCOUNT_INSN_SIZE, %rdi | ||
96 | /* Load the parent_ip into the second parameter */ | ||
97 | #ifdef CC_USING_FENTRY | ||
98 | movq SS+16(%rsp), %rsi | ||
99 | #else | ||
100 | movq 8(%rbp), %rsi | ||
101 | #endif | ||
102 | .endm | ||
75 | 103 | ||
76 | ENTRY(ftrace_caller) | 104 | ENTRY(ftrace_caller) |
105 | /* Check if tracing was disabled (quick check) */ | ||
77 | cmpl $0, function_trace_stop | 106 | cmpl $0, function_trace_stop |
78 | jne ftrace_stub | 107 | jne ftrace_stub |
79 | 108 | ||
80 | MCOUNT_SAVE_FRAME | 109 | ftrace_caller_setup |
81 | 110 | /* regs go into 4th parameter (but make it NULL) */ | |
82 | movq 0x38(%rsp), %rdi | 111 | movq $0, %rcx |
83 | movq 8(%rbp), %rsi | ||
84 | subq $MCOUNT_INSN_SIZE, %rdi | ||
85 | 112 | ||
86 | GLOBAL(ftrace_call) | 113 | GLOBAL(ftrace_call) |
87 | call ftrace_stub | 114 | call ftrace_stub |
88 | 115 | ||
89 | MCOUNT_RESTORE_FRAME | 116 | MCOUNT_RESTORE_FRAME |
117 | ftrace_return: | ||
90 | 118 | ||
91 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 119 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
92 | GLOBAL(ftrace_graph_call) | 120 | GLOBAL(ftrace_graph_call) |
@@ -97,8 +125,78 @@ GLOBAL(ftrace_stub) | |||
97 | retq | 125 | retq |
98 | END(ftrace_caller) | 126 | END(ftrace_caller) |
99 | 127 | ||
128 | ENTRY(ftrace_regs_caller) | ||
129 | /* Save the current flags before compare (in SS location)*/ | ||
130 | pushfq | ||
131 | |||
132 | /* Check if tracing was disabled (quick check) */ | ||
133 | cmpl $0, function_trace_stop | ||
134 | jne ftrace_restore_flags | ||
135 | |||
136 | /* skip=8 to skip flags saved in SS */ | ||
137 | ftrace_caller_setup 8 | ||
138 | |||
139 | /* Save the rest of pt_regs */ | ||
140 | movq %r15, R15(%rsp) | ||
141 | movq %r14, R14(%rsp) | ||
142 | movq %r13, R13(%rsp) | ||
143 | movq %r12, R12(%rsp) | ||
144 | movq %r11, R11(%rsp) | ||
145 | movq %r10, R10(%rsp) | ||
146 | movq %rbp, RBP(%rsp) | ||
147 | movq %rbx, RBX(%rsp) | ||
148 | /* Copy saved flags */ | ||
149 | movq SS(%rsp), %rcx | ||
150 | movq %rcx, EFLAGS(%rsp) | ||
151 | /* Kernel segments */ | ||
152 | movq $__KERNEL_DS, %rcx | ||
153 | movq %rcx, SS(%rsp) | ||
154 | movq $__KERNEL_CS, %rcx | ||
155 | movq %rcx, CS(%rsp) | ||
156 | /* Stack - skipping return address */ | ||
157 | leaq SS+16(%rsp), %rcx | ||
158 | movq %rcx, RSP(%rsp) | ||
159 | |||
160 | /* regs go into 4th parameter */ | ||
161 | leaq (%rsp), %rcx | ||
162 | |||
163 | GLOBAL(ftrace_regs_call) | ||
164 | call ftrace_stub | ||
165 | |||
166 | /* Copy flags back to SS, to restore them */ | ||
167 | movq EFLAGS(%rsp), %rax | ||
168 | movq %rax, SS(%rsp) | ||
169 | |||
170 | /* Handlers can change the RIP */ | ||
171 | movq RIP(%rsp), %rax | ||
172 | movq %rax, SS+8(%rsp) | ||
173 | |||
174 | /* restore the rest of pt_regs */ | ||
175 | movq R15(%rsp), %r15 | ||
176 | movq R14(%rsp), %r14 | ||
177 | movq R13(%rsp), %r13 | ||
178 | movq R12(%rsp), %r12 | ||
179 | movq R10(%rsp), %r10 | ||
180 | movq RBP(%rsp), %rbp | ||
181 | movq RBX(%rsp), %rbx | ||
182 | |||
183 | /* skip=8 to skip flags saved in SS */ | ||
184 | MCOUNT_RESTORE_FRAME 8 | ||
185 | |||
186 | /* Restore flags */ | ||
187 | popfq | ||
188 | |||
189 | jmp ftrace_return | ||
190 | ftrace_restore_flags: | ||
191 | popfq | ||
192 | jmp ftrace_stub | ||
193 | |||
194 | END(ftrace_regs_caller) | ||
195 | |||
196 | |||
100 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 197 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
101 | ENTRY(mcount) | 198 | |
199 | ENTRY(function_hook) | ||
102 | cmpl $0, function_trace_stop | 200 | cmpl $0, function_trace_stop |
103 | jne ftrace_stub | 201 | jne ftrace_stub |
104 | 202 | ||
@@ -119,8 +217,12 @@ GLOBAL(ftrace_stub) | |||
119 | trace: | 217 | trace: |
120 | MCOUNT_SAVE_FRAME | 218 | MCOUNT_SAVE_FRAME |
121 | 219 | ||
122 | movq 0x38(%rsp), %rdi | 220 | movq RIP(%rsp), %rdi |
221 | #ifdef CC_USING_FENTRY | ||
222 | movq SS+16(%rsp), %rsi | ||
223 | #else | ||
123 | movq 8(%rbp), %rsi | 224 | movq 8(%rbp), %rsi |
225 | #endif | ||
124 | subq $MCOUNT_INSN_SIZE, %rdi | 226 | subq $MCOUNT_INSN_SIZE, %rdi |
125 | 227 | ||
126 | call *ftrace_trace_function | 228 | call *ftrace_trace_function |
@@ -128,20 +230,22 @@ trace: | |||
128 | MCOUNT_RESTORE_FRAME | 230 | MCOUNT_RESTORE_FRAME |
129 | 231 | ||
130 | jmp ftrace_stub | 232 | jmp ftrace_stub |
131 | END(mcount) | 233 | END(function_hook) |
132 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 234 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
133 | #endif /* CONFIG_FUNCTION_TRACER */ | 235 | #endif /* CONFIG_FUNCTION_TRACER */ |
134 | 236 | ||
135 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 237 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
136 | ENTRY(ftrace_graph_caller) | 238 | ENTRY(ftrace_graph_caller) |
137 | cmpl $0, function_trace_stop | ||
138 | jne ftrace_stub | ||
139 | |||
140 | MCOUNT_SAVE_FRAME | 239 | MCOUNT_SAVE_FRAME |
141 | 240 | ||
241 | #ifdef CC_USING_FENTRY | ||
242 | leaq SS+16(%rsp), %rdi | ||
243 | movq $0, %rdx /* No framepointers needed */ | ||
244 | #else | ||
142 | leaq 8(%rbp), %rdi | 245 | leaq 8(%rbp), %rdi |
143 | movq 0x38(%rsp), %rsi | ||
144 | movq (%rbp), %rdx | 246 | movq (%rbp), %rdx |
247 | #endif | ||
248 | movq RIP(%rsp), %rsi | ||
145 | subq $MCOUNT_INSN_SIZE, %rsi | 249 | subq $MCOUNT_INSN_SIZE, %rsi |
146 | 250 | ||
147 | call prepare_ftrace_return | 251 | call prepare_ftrace_return |
@@ -342,15 +446,15 @@ ENDPROC(native_usergs_sysret64) | |||
342 | .macro SAVE_ARGS_IRQ | 446 | .macro SAVE_ARGS_IRQ |
343 | cld | 447 | cld |
344 | /* start from rbp in pt_regs and jump over */ | 448 | /* start from rbp in pt_regs and jump over */ |
345 | movq_cfi rdi, RDI-RBP | 449 | movq_cfi rdi, (RDI-RBP) |
346 | movq_cfi rsi, RSI-RBP | 450 | movq_cfi rsi, (RSI-RBP) |
347 | movq_cfi rdx, RDX-RBP | 451 | movq_cfi rdx, (RDX-RBP) |
348 | movq_cfi rcx, RCX-RBP | 452 | movq_cfi rcx, (RCX-RBP) |
349 | movq_cfi rax, RAX-RBP | 453 | movq_cfi rax, (RAX-RBP) |
350 | movq_cfi r8, R8-RBP | 454 | movq_cfi r8, (R8-RBP) |
351 | movq_cfi r9, R9-RBP | 455 | movq_cfi r9, (R9-RBP) |
352 | movq_cfi r10, R10-RBP | 456 | movq_cfi r10, (R10-RBP) |
353 | movq_cfi r11, R11-RBP | 457 | movq_cfi r11, (R11-RBP) |
354 | 458 | ||
355 | /* Save rbp so that we can unwind from get_irq_regs() */ | 459 | /* Save rbp so that we can unwind from get_irq_regs() */ |
356 | movq_cfi rbp, 0 | 460 | movq_cfi rbp, 0 |
@@ -384,7 +488,7 @@ ENDPROC(native_usergs_sysret64) | |||
384 | .endm | 488 | .endm |
385 | 489 | ||
386 | ENTRY(save_rest) | 490 | ENTRY(save_rest) |
387 | PARTIAL_FRAME 1 REST_SKIP+8 | 491 | PARTIAL_FRAME 1 (REST_SKIP+8) |
388 | movq 5*8+16(%rsp), %r11 /* save return address */ | 492 | movq 5*8+16(%rsp), %r11 /* save return address */ |
389 | movq_cfi rbx, RBX+16 | 493 | movq_cfi rbx, RBX+16 |
390 | movq_cfi rbp, RBP+16 | 494 | movq_cfi rbp, RBP+16 |
@@ -440,7 +544,7 @@ ENTRY(ret_from_fork) | |||
440 | 544 | ||
441 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | 545 | LOCK ; btr $TIF_FORK,TI_flags(%r8) |
442 | 546 | ||
443 | pushq_cfi kernel_eflags(%rip) | 547 | pushq_cfi $0x0002 |
444 | popfq_cfi # reset kernel eflags | 548 | popfq_cfi # reset kernel eflags |
445 | 549 | ||
446 | call schedule_tail # rdi: 'prev' task parameter | 550 | call schedule_tail # rdi: 'prev' task parameter |
@@ -465,7 +569,8 @@ END(ret_from_fork) | |||
465 | * System call entry. Up to 6 arguments in registers are supported. | 569 | * System call entry. Up to 6 arguments in registers are supported. |
466 | * | 570 | * |
467 | * SYSCALL does not save anything on the stack and does not change the | 571 | * SYSCALL does not save anything on the stack and does not change the |
468 | * stack pointer. | 572 | * stack pointer. However, it does mask the flags register for us, so |
573 | * CLD and CLAC are not needed. | ||
469 | */ | 574 | */ |
470 | 575 | ||
471 | /* | 576 | /* |
@@ -565,7 +670,7 @@ sysret_careful: | |||
565 | TRACE_IRQS_ON | 670 | TRACE_IRQS_ON |
566 | ENABLE_INTERRUPTS(CLBR_NONE) | 671 | ENABLE_INTERRUPTS(CLBR_NONE) |
567 | pushq_cfi %rdi | 672 | pushq_cfi %rdi |
568 | call schedule | 673 | SCHEDULE_USER |
569 | popq_cfi %rdi | 674 | popq_cfi %rdi |
570 | jmp sysret_check | 675 | jmp sysret_check |
571 | 676 | ||
@@ -678,7 +783,7 @@ int_careful: | |||
678 | TRACE_IRQS_ON | 783 | TRACE_IRQS_ON |
679 | ENABLE_INTERRUPTS(CLBR_NONE) | 784 | ENABLE_INTERRUPTS(CLBR_NONE) |
680 | pushq_cfi %rdi | 785 | pushq_cfi %rdi |
681 | call schedule | 786 | SCHEDULE_USER |
682 | popq_cfi %rdi | 787 | popq_cfi %rdi |
683 | DISABLE_INTERRUPTS(CLBR_NONE) | 788 | DISABLE_INTERRUPTS(CLBR_NONE) |
684 | TRACE_IRQS_OFF | 789 | TRACE_IRQS_OFF |
@@ -884,6 +989,7 @@ END(interrupt) | |||
884 | */ | 989 | */ |
885 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 990 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
886 | common_interrupt: | 991 | common_interrupt: |
992 | ASM_CLAC | ||
887 | XCPT_FRAME | 993 | XCPT_FRAME |
888 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | 994 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ |
889 | interrupt do_IRQ | 995 | interrupt do_IRQ |
@@ -974,7 +1080,7 @@ retint_careful: | |||
974 | TRACE_IRQS_ON | 1080 | TRACE_IRQS_ON |
975 | ENABLE_INTERRUPTS(CLBR_NONE) | 1081 | ENABLE_INTERRUPTS(CLBR_NONE) |
976 | pushq_cfi %rdi | 1082 | pushq_cfi %rdi |
977 | call schedule | 1083 | SCHEDULE_USER |
978 | popq_cfi %rdi | 1084 | popq_cfi %rdi |
979 | GET_THREAD_INFO(%rcx) | 1085 | GET_THREAD_INFO(%rcx) |
980 | DISABLE_INTERRUPTS(CLBR_NONE) | 1086 | DISABLE_INTERRUPTS(CLBR_NONE) |
@@ -1023,6 +1129,7 @@ END(common_interrupt) | |||
1023 | */ | 1129 | */ |
1024 | .macro apicinterrupt num sym do_sym | 1130 | .macro apicinterrupt num sym do_sym |
1025 | ENTRY(\sym) | 1131 | ENTRY(\sym) |
1132 | ASM_CLAC | ||
1026 | INTR_FRAME | 1133 | INTR_FRAME |
1027 | pushq_cfi $~(\num) | 1134 | pushq_cfi $~(\num) |
1028 | .Lcommon_\sym: | 1135 | .Lcommon_\sym: |
@@ -1077,6 +1184,7 @@ apicinterrupt IRQ_WORK_VECTOR \ | |||
1077 | */ | 1184 | */ |
1078 | .macro zeroentry sym do_sym | 1185 | .macro zeroentry sym do_sym |
1079 | ENTRY(\sym) | 1186 | ENTRY(\sym) |
1187 | ASM_CLAC | ||
1080 | INTR_FRAME | 1188 | INTR_FRAME |
1081 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1189 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1082 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1190 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
@@ -1094,6 +1202,7 @@ END(\sym) | |||
1094 | 1202 | ||
1095 | .macro paranoidzeroentry sym do_sym | 1203 | .macro paranoidzeroentry sym do_sym |
1096 | ENTRY(\sym) | 1204 | ENTRY(\sym) |
1205 | ASM_CLAC | ||
1097 | INTR_FRAME | 1206 | INTR_FRAME |
1098 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1207 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1099 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1208 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
@@ -1112,6 +1221,7 @@ END(\sym) | |||
1112 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | 1221 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) |
1113 | .macro paranoidzeroentry_ist sym do_sym ist | 1222 | .macro paranoidzeroentry_ist sym do_sym ist |
1114 | ENTRY(\sym) | 1223 | ENTRY(\sym) |
1224 | ASM_CLAC | ||
1115 | INTR_FRAME | 1225 | INTR_FRAME |
1116 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1117 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1227 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
@@ -1131,6 +1241,7 @@ END(\sym) | |||
1131 | 1241 | ||
1132 | .macro errorentry sym do_sym | 1242 | .macro errorentry sym do_sym |
1133 | ENTRY(\sym) | 1243 | ENTRY(\sym) |
1244 | ASM_CLAC | ||
1134 | XCPT_FRAME | 1245 | XCPT_FRAME |
1135 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1246 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1136 | subq $ORIG_RAX-R15, %rsp | 1247 | subq $ORIG_RAX-R15, %rsp |
@@ -1149,6 +1260,7 @@ END(\sym) | |||
1149 | /* error code is on the stack already */ | 1260 | /* error code is on the stack already */ |
1150 | .macro paranoiderrorentry sym do_sym | 1261 | .macro paranoiderrorentry sym do_sym |
1151 | ENTRY(\sym) | 1262 | ENTRY(\sym) |
1263 | ASM_CLAC | ||
1152 | XCPT_FRAME | 1264 | XCPT_FRAME |
1153 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1265 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1154 | subq $ORIG_RAX-R15, %rsp | 1266 | subq $ORIG_RAX-R15, %rsp |
@@ -1449,7 +1561,7 @@ paranoid_userspace: | |||
1449 | paranoid_schedule: | 1561 | paranoid_schedule: |
1450 | TRACE_IRQS_ON | 1562 | TRACE_IRQS_ON |
1451 | ENABLE_INTERRUPTS(CLBR_ANY) | 1563 | ENABLE_INTERRUPTS(CLBR_ANY) |
1452 | call schedule | 1564 | SCHEDULE_USER |
1453 | DISABLE_INTERRUPTS(CLBR_ANY) | 1565 | DISABLE_INTERRUPTS(CLBR_ANY) |
1454 | TRACE_IRQS_OFF | 1566 | TRACE_IRQS_OFF |
1455 | jmp paranoid_userspace | 1567 | jmp paranoid_userspace |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e6..1d414029f1d8 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -206,6 +206,21 @@ static int | |||
206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | 206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, |
207 | unsigned const char *new_code); | 207 | unsigned const char *new_code); |
208 | 208 | ||
209 | /* | ||
210 | * Should never be called: | ||
211 | * As it is only called by __ftrace_replace_code() which is called by | ||
212 | * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() | ||
213 | * which is called to turn mcount into nops or nops into function calls | ||
214 | * but not to convert a function from not using regs to one that uses | ||
215 | * regs, which ftrace_modify_call() is for. | ||
216 | */ | ||
217 | int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, | ||
218 | unsigned long addr) | ||
219 | { | ||
220 | WARN_ON(1); | ||
221 | return -EINVAL; | ||
222 | } | ||
223 | |||
209 | int ftrace_update_ftrace_func(ftrace_func_t func) | 224 | int ftrace_update_ftrace_func(ftrace_func_t func) |
210 | { | 225 | { |
211 | unsigned long ip = (unsigned long)(&ftrace_call); | 226 | unsigned long ip = (unsigned long)(&ftrace_call); |
@@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
220 | 235 | ||
221 | ret = ftrace_modify_code(ip, old, new); | 236 | ret = ftrace_modify_code(ip, old, new); |
222 | 237 | ||
238 | /* Also update the regs callback function */ | ||
239 | if (!ret) { | ||
240 | ip = (unsigned long)(&ftrace_regs_call); | ||
241 | memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); | ||
242 | new = ftrace_call_replace(ip, (unsigned long)func); | ||
243 | ret = ftrace_modify_code(ip, old, new); | ||
244 | } | ||
245 | |||
223 | atomic_dec(&modifying_ftrace_code); | 246 | atomic_dec(&modifying_ftrace_code); |
224 | 247 | ||
225 | return ret; | 248 | return ret; |
@@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) | |||
299 | return add_break(rec->ip, old); | 322 | return add_break(rec->ip, old); |
300 | } | 323 | } |
301 | 324 | ||
325 | /* | ||
326 | * If the record has the FTRACE_FL_REGS set, that means that it | ||
327 | * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS | ||
328 | * is not not set, then it wants to convert to the normal callback. | ||
329 | */ | ||
330 | static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) | ||
331 | { | ||
332 | if (rec->flags & FTRACE_FL_REGS) | ||
333 | return (unsigned long)FTRACE_REGS_ADDR; | ||
334 | else | ||
335 | return (unsigned long)FTRACE_ADDR; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * The FTRACE_FL_REGS_EN is set when the record already points to | ||
340 | * a function that saves all the regs. Basically the '_EN' version | ||
341 | * represents the current state of the function. | ||
342 | */ | ||
343 | static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) | ||
344 | { | ||
345 | if (rec->flags & FTRACE_FL_REGS_EN) | ||
346 | return (unsigned long)FTRACE_REGS_ADDR; | ||
347 | else | ||
348 | return (unsigned long)FTRACE_ADDR; | ||
349 | } | ||
350 | |||
302 | static int add_breakpoints(struct dyn_ftrace *rec, int enable) | 351 | static int add_breakpoints(struct dyn_ftrace *rec, int enable) |
303 | { | 352 | { |
304 | unsigned long ftrace_addr; | 353 | unsigned long ftrace_addr; |
@@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) | |||
306 | 355 | ||
307 | ret = ftrace_test_record(rec, enable); | 356 | ret = ftrace_test_record(rec, enable); |
308 | 357 | ||
309 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 358 | ftrace_addr = get_ftrace_addr(rec); |
310 | 359 | ||
311 | switch (ret) { | 360 | switch (ret) { |
312 | case FTRACE_UPDATE_IGNORE: | 361 | case FTRACE_UPDATE_IGNORE: |
@@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) | |||
316 | /* converting nop to call */ | 365 | /* converting nop to call */ |
317 | return add_brk_on_nop(rec); | 366 | return add_brk_on_nop(rec); |
318 | 367 | ||
368 | case FTRACE_UPDATE_MODIFY_CALL_REGS: | ||
369 | case FTRACE_UPDATE_MODIFY_CALL: | ||
370 | ftrace_addr = get_ftrace_old_addr(rec); | ||
371 | /* fall through */ | ||
319 | case FTRACE_UPDATE_MAKE_NOP: | 372 | case FTRACE_UPDATE_MAKE_NOP: |
320 | /* converting a call to a nop */ | 373 | /* converting a call to a nop */ |
321 | return add_brk_on_call(rec, ftrace_addr); | 374 | return add_brk_on_call(rec, ftrace_addr); |
@@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) | |||
360 | * If not, don't touch the breakpoint, we make just create | 413 | * If not, don't touch the breakpoint, we make just create |
361 | * a disaster. | 414 | * a disaster. |
362 | */ | 415 | */ |
363 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 416 | ftrace_addr = get_ftrace_addr(rec); |
417 | nop = ftrace_call_replace(ip, ftrace_addr); | ||
418 | |||
419 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) | ||
420 | goto update; | ||
421 | |||
422 | /* Check both ftrace_addr and ftrace_old_addr */ | ||
423 | ftrace_addr = get_ftrace_old_addr(rec); | ||
364 | nop = ftrace_call_replace(ip, ftrace_addr); | 424 | nop = ftrace_call_replace(ip, ftrace_addr); |
365 | 425 | ||
366 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) | 426 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) |
367 | return -EINVAL; | 427 | return -EINVAL; |
368 | } | 428 | } |
369 | 429 | ||
430 | update: | ||
370 | return probe_kernel_write((void *)ip, &nop[0], 1); | 431 | return probe_kernel_write((void *)ip, &nop[0], 1); |
371 | } | 432 | } |
372 | 433 | ||
@@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) | |||
405 | 466 | ||
406 | ret = ftrace_test_record(rec, enable); | 467 | ret = ftrace_test_record(rec, enable); |
407 | 468 | ||
408 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 469 | ftrace_addr = get_ftrace_addr(rec); |
409 | 470 | ||
410 | switch (ret) { | 471 | switch (ret) { |
411 | case FTRACE_UPDATE_IGNORE: | 472 | case FTRACE_UPDATE_IGNORE: |
412 | return 0; | 473 | return 0; |
413 | 474 | ||
475 | case FTRACE_UPDATE_MODIFY_CALL_REGS: | ||
476 | case FTRACE_UPDATE_MODIFY_CALL: | ||
414 | case FTRACE_UPDATE_MAKE_CALL: | 477 | case FTRACE_UPDATE_MAKE_CALL: |
415 | /* converting nop to call */ | 478 | /* converting nop to call */ |
416 | return add_update_call(rec, ftrace_addr); | 479 | return add_update_call(rec, ftrace_addr); |
@@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) | |||
455 | 518 | ||
456 | ret = ftrace_update_record(rec, enable); | 519 | ret = ftrace_update_record(rec, enable); |
457 | 520 | ||
458 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 521 | ftrace_addr = get_ftrace_addr(rec); |
459 | 522 | ||
460 | switch (ret) { | 523 | switch (ret) { |
461 | case FTRACE_UPDATE_IGNORE: | 524 | case FTRACE_UPDATE_IGNORE: |
462 | return 0; | 525 | return 0; |
463 | 526 | ||
527 | case FTRACE_UPDATE_MODIFY_CALL_REGS: | ||
528 | case FTRACE_UPDATE_MODIFY_CALL: | ||
464 | case FTRACE_UPDATE_MAKE_CALL: | 529 | case FTRACE_UPDATE_MAKE_CALL: |
465 | /* converting nop to call */ | 530 | /* converting nop to call */ |
466 | return finish_update_call(rec, ftrace_addr); | 531 | return finish_update_call(rec, ftrace_addr); |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d42ab17b7397..957a47aec64e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -287,27 +287,28 @@ ENTRY(startup_32_smp) | |||
287 | leal -__PAGE_OFFSET(%ecx),%esp | 287 | leal -__PAGE_OFFSET(%ecx),%esp |
288 | 288 | ||
289 | default_entry: | 289 | default_entry: |
290 | |||
291 | /* | 290 | /* |
292 | * New page tables may be in 4Mbyte page mode and may | 291 | * New page tables may be in 4Mbyte page mode and may |
293 | * be using the global pages. | 292 | * be using the global pages. |
294 | * | 293 | * |
295 | * NOTE! If we are on a 486 we may have no cr4 at all! | 294 | * NOTE! If we are on a 486 we may have no cr4 at all! |
296 | * So we do not try to touch it unless we really have | 295 | * Specifically, cr4 exists if and only if CPUID exists, |
297 | * some bits in it to set. This won't work if the BSP | 296 | * which in turn exists if and only if EFLAGS.ID exists. |
298 | * implements cr4 but this AP does not -- very unlikely | ||
299 | * but be warned! The same applies to the pse feature | ||
300 | * if not equally supported. --macro | ||
301 | * | ||
302 | * NOTE! We have to correct for the fact that we're | ||
303 | * not yet offset PAGE_OFFSET.. | ||
304 | */ | 297 | */ |
305 | #define cr4_bits pa(mmu_cr4_features) | 298 | movl $X86_EFLAGS_ID,%ecx |
306 | movl cr4_bits,%edx | 299 | pushl %ecx |
307 | andl %edx,%edx | 300 | popfl |
308 | jz 6f | 301 | pushfl |
309 | movl %cr4,%eax # Turn on paging options (PSE,PAE,..) | 302 | popl %eax |
310 | orl %edx,%eax | 303 | pushl $0 |
304 | popfl | ||
305 | pushfl | ||
306 | popl %edx | ||
307 | xorl %edx,%eax | ||
308 | testl %ecx,%eax | ||
309 | jz 6f # No ID flag = no CPUID = no CR4 | ||
310 | |||
311 | movl pa(mmu_cr4_features),%eax | ||
311 | movl %eax,%cr4 | 312 | movl %eax,%cr4 |
312 | 313 | ||
313 | testb $X86_CR4_PAE, %al # check if PAE is enabled | 314 | testb $X86_CR4_PAE, %al # check if PAE is enabled |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f250431fb505..675a05012449 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -19,24 +19,17 @@ | |||
19 | #include <asm/fpu-internal.h> | 19 | #include <asm/fpu-internal.h> |
20 | #include <asm/user.h> | 20 | #include <asm/user.h> |
21 | 21 | ||
22 | #ifdef CONFIG_X86_64 | ||
23 | # include <asm/sigcontext32.h> | ||
24 | # include <asm/user32.h> | ||
25 | #else | ||
26 | # define save_i387_xstate_ia32 save_i387_xstate | ||
27 | # define restore_i387_xstate_ia32 restore_i387_xstate | ||
28 | # define _fpstate_ia32 _fpstate | ||
29 | # define _xstate_ia32 _xstate | ||
30 | # define sig_xstate_ia32_size sig_xstate_size | ||
31 | # define fx_sw_reserved_ia32 fx_sw_reserved | ||
32 | # define user_i387_ia32_struct user_i387_struct | ||
33 | # define user32_fxsr_struct user_fxsr_struct | ||
34 | #endif | ||
35 | |||
36 | /* | 22 | /* |
37 | * Were we in an interrupt that interrupted kernel mode? | 23 | * Were we in an interrupt that interrupted kernel mode? |
38 | * | 24 | * |
39 | * We can do a kernel_fpu_begin/end() pair *ONLY* if that | 25 | * For now, with eagerfpu we will return interrupted kernel FPU |
26 | * state as not-idle. TBD: Ideally we can change the return value | ||
27 | * to something like __thread_has_fpu(current). But we need to | ||
28 | * be careful of doing __thread_clear_has_fpu() before saving | ||
29 | * the FPU etc for supporting nested uses etc. For now, take | ||
30 | * the simple route! | ||
31 | * | ||
32 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
40 | * pair does nothing at all: the thread must not have fpu (so | 33 | * pair does nothing at all: the thread must not have fpu (so |
41 | * that we don't try to save the FPU state), and TS must | 34 | * that we don't try to save the FPU state), and TS must |
42 | * be set (so that the clts/stts pair does nothing that is | 35 | * be set (so that the clts/stts pair does nothing that is |
@@ -44,6 +37,9 @@ | |||
44 | */ | 37 | */ |
45 | static inline bool interrupted_kernel_fpu_idle(void) | 38 | static inline bool interrupted_kernel_fpu_idle(void) |
46 | { | 39 | { |
40 | if (use_eager_fpu()) | ||
41 | return 0; | ||
42 | |||
47 | return !__thread_has_fpu(current) && | 43 | return !__thread_has_fpu(current) && |
48 | (read_cr0() & X86_CR0_TS); | 44 | (read_cr0() & X86_CR0_TS); |
49 | } | 45 | } |
@@ -77,29 +73,29 @@ bool irq_fpu_usable(void) | |||
77 | } | 73 | } |
78 | EXPORT_SYMBOL(irq_fpu_usable); | 74 | EXPORT_SYMBOL(irq_fpu_usable); |
79 | 75 | ||
80 | void kernel_fpu_begin(void) | 76 | void __kernel_fpu_begin(void) |
81 | { | 77 | { |
82 | struct task_struct *me = current; | 78 | struct task_struct *me = current; |
83 | 79 | ||
84 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
85 | preempt_disable(); | ||
86 | if (__thread_has_fpu(me)) { | 80 | if (__thread_has_fpu(me)) { |
87 | __save_init_fpu(me); | 81 | __save_init_fpu(me); |
88 | __thread_clear_has_fpu(me); | 82 | __thread_clear_has_fpu(me); |
89 | /* We do 'stts()' in kernel_fpu_end() */ | 83 | /* We do 'stts()' in __kernel_fpu_end() */ |
90 | } else { | 84 | } else if (!use_eager_fpu()) { |
91 | this_cpu_write(fpu_owner_task, NULL); | 85 | this_cpu_write(fpu_owner_task, NULL); |
92 | clts(); | 86 | clts(); |
93 | } | 87 | } |
94 | } | 88 | } |
95 | EXPORT_SYMBOL(kernel_fpu_begin); | 89 | EXPORT_SYMBOL(__kernel_fpu_begin); |
96 | 90 | ||
97 | void kernel_fpu_end(void) | 91 | void __kernel_fpu_end(void) |
98 | { | 92 | { |
99 | stts(); | 93 | if (use_eager_fpu()) |
100 | preempt_enable(); | 94 | math_state_restore(); |
95 | else | ||
96 | stts(); | ||
101 | } | 97 | } |
102 | EXPORT_SYMBOL(kernel_fpu_end); | 98 | EXPORT_SYMBOL(__kernel_fpu_end); |
103 | 99 | ||
104 | void unlazy_fpu(struct task_struct *tsk) | 100 | void unlazy_fpu(struct task_struct *tsk) |
105 | { | 101 | { |
@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk) | |||
113 | } | 109 | } |
114 | EXPORT_SYMBOL(unlazy_fpu); | 110 | EXPORT_SYMBOL(unlazy_fpu); |
115 | 111 | ||
116 | #ifdef CONFIG_MATH_EMULATION | 112 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
117 | # define HAVE_HWFP (boot_cpu_data.hard_math) | ||
118 | #else | ||
119 | # define HAVE_HWFP 1 | ||
120 | #endif | ||
121 | |||
122 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | ||
123 | unsigned int xstate_size; | 113 | unsigned int xstate_size; |
124 | EXPORT_SYMBOL_GPL(xstate_size); | 114 | EXPORT_SYMBOL_GPL(xstate_size); |
125 | unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); | ||
126 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; | 115 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; |
127 | 116 | ||
128 | static void __cpuinit mxcsr_feature_mask_init(void) | 117 | static void __cpuinit mxcsr_feature_mask_init(void) |
129 | { | 118 | { |
130 | unsigned long mask = 0; | 119 | unsigned long mask = 0; |
131 | 120 | ||
132 | clts(); | ||
133 | if (cpu_has_fxsr) { | 121 | if (cpu_has_fxsr) { |
134 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); | 122 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); |
135 | asm volatile("fxsave %0" : : "m" (fx_scratch)); | 123 | asm volatile("fxsave %0" : : "m" (fx_scratch)); |
@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) | |||
138 | mask = 0x0000ffbf; | 126 | mask = 0x0000ffbf; |
139 | } | 127 | } |
140 | mxcsr_feature_mask &= mask; | 128 | mxcsr_feature_mask &= mask; |
141 | stts(); | ||
142 | } | 129 | } |
143 | 130 | ||
144 | static void __cpuinit init_thread_xstate(void) | 131 | static void __cpuinit init_thread_xstate(void) |
@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void) | |||
192 | init_thread_xstate(); | 179 | init_thread_xstate(); |
193 | 180 | ||
194 | mxcsr_feature_mask_init(); | 181 | mxcsr_feature_mask_init(); |
195 | /* clean state in init */ | 182 | xsave_init(); |
196 | current_thread_info()->status = 0; | 183 | eager_fpu_init(); |
197 | clear_used_math(); | ||
198 | } | 184 | } |
199 | 185 | ||
200 | void fpu_finit(struct fpu *fpu) | 186 | void fpu_finit(struct fpu *fpu) |
@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu) | |||
205 | } | 191 | } |
206 | 192 | ||
207 | if (cpu_has_fxsr) { | 193 | if (cpu_has_fxsr) { |
208 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | 194 | fx_finit(&fpu->state->fxsave); |
209 | |||
210 | memset(fx, 0, xstate_size); | ||
211 | fx->cwd = 0x37f; | ||
212 | if (cpu_has_xmm) | ||
213 | fx->mxcsr = MXCSR_DEFAULT; | ||
214 | } else { | 195 | } else { |
215 | struct i387_fsave_struct *fp = &fpu->state->fsave; | 196 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
216 | memset(fp, 0, xstate_size); | 197 | memset(fp, 0, xstate_size); |
@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | |||
454 | * FXSR floating point environment conversions. | 435 | * FXSR floating point environment conversions. |
455 | */ | 436 | */ |
456 | 437 | ||
457 | static void | 438 | void |
458 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | 439 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
459 | { | 440 | { |
460 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; | 441 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
@@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | |||
491 | memcpy(&to[i], &from[i], sizeof(to[0])); | 472 | memcpy(&to[i], &from[i], sizeof(to[0])); |
492 | } | 473 | } |
493 | 474 | ||
494 | static void convert_to_fxsr(struct task_struct *tsk, | 475 | void convert_to_fxsr(struct task_struct *tsk, |
495 | const struct user_i387_ia32_struct *env) | 476 | const struct user_i387_ia32_struct *env) |
496 | 477 | ||
497 | { | 478 | { |
498 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; | 479 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
@@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
589 | } | 570 | } |
590 | 571 | ||
591 | /* | 572 | /* |
592 | * Signal frame handlers. | ||
593 | */ | ||
594 | |||
595 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | ||
596 | { | ||
597 | struct task_struct *tsk = current; | ||
598 | struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; | ||
599 | |||
600 | fp->status = fp->swd; | ||
601 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | ||
602 | return -1; | ||
603 | return 1; | ||
604 | } | ||
605 | |||
606 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | ||
607 | { | ||
608 | struct task_struct *tsk = current; | ||
609 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; | ||
610 | struct user_i387_ia32_struct env; | ||
611 | int err = 0; | ||
612 | |||
613 | convert_from_fxsr(&env, tsk); | ||
614 | if (__copy_to_user(buf, &env, sizeof(env))) | ||
615 | return -1; | ||
616 | |||
617 | err |= __put_user(fx->swd, &buf->status); | ||
618 | err |= __put_user(X86_FXSR_MAGIC, &buf->magic); | ||
619 | if (err) | ||
620 | return -1; | ||
621 | |||
622 | if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) | ||
623 | return -1; | ||
624 | return 1; | ||
625 | } | ||
626 | |||
627 | static int save_i387_xsave(void __user *buf) | ||
628 | { | ||
629 | struct task_struct *tsk = current; | ||
630 | struct _fpstate_ia32 __user *fx = buf; | ||
631 | int err = 0; | ||
632 | |||
633 | |||
634 | sanitize_i387_state(tsk); | ||
635 | |||
636 | /* | ||
637 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
638 | * vector while saving the state to the user context. | ||
639 | * This will enable us capturing any changes(during sigreturn) to | ||
640 | * the FP/SSE bits by the legacy applications which don't touch | ||
641 | * xstate_bv in the xsave header. | ||
642 | * | ||
643 | * xsave aware applications can change the xstate_bv in the xsave | ||
644 | * header as well as change any contents in the memory layout. | ||
645 | * xrestore as part of sigreturn will capture all the changes. | ||
646 | */ | ||
647 | tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | ||
648 | |||
649 | if (save_i387_fxsave(fx) < 0) | ||
650 | return -1; | ||
651 | |||
652 | err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, | ||
653 | sizeof(struct _fpx_sw_bytes)); | ||
654 | err |= __put_user(FP_XSTATE_MAGIC2, | ||
655 | (__u32 __user *) (buf + sig_xstate_ia32_size | ||
656 | - FP_XSTATE_MAGIC2_SIZE)); | ||
657 | if (err) | ||
658 | return -1; | ||
659 | |||
660 | return 1; | ||
661 | } | ||
662 | |||
663 | int save_i387_xstate_ia32(void __user *buf) | ||
664 | { | ||
665 | struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; | ||
666 | struct task_struct *tsk = current; | ||
667 | |||
668 | if (!used_math()) | ||
669 | return 0; | ||
670 | |||
671 | if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) | ||
672 | return -EACCES; | ||
673 | /* | ||
674 | * This will cause a "finit" to be triggered by the next | ||
675 | * attempted FPU operation by the 'current' process. | ||
676 | */ | ||
677 | clear_used_math(); | ||
678 | |||
679 | if (!HAVE_HWFP) { | ||
680 | return fpregs_soft_get(current, NULL, | ||
681 | 0, sizeof(struct user_i387_ia32_struct), | ||
682 | NULL, fp) ? -1 : 1; | ||
683 | } | ||
684 | |||
685 | unlazy_fpu(tsk); | ||
686 | |||
687 | if (cpu_has_xsave) | ||
688 | return save_i387_xsave(fp); | ||
689 | if (cpu_has_fxsr) | ||
690 | return save_i387_fxsave(fp); | ||
691 | else | ||
692 | return save_i387_fsave(fp); | ||
693 | } | ||
694 | |||
695 | static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | ||
696 | { | ||
697 | struct task_struct *tsk = current; | ||
698 | |||
699 | return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, | ||
700 | sizeof(struct i387_fsave_struct)); | ||
701 | } | ||
702 | |||
703 | static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, | ||
704 | unsigned int size) | ||
705 | { | ||
706 | struct task_struct *tsk = current; | ||
707 | struct user_i387_ia32_struct env; | ||
708 | int err; | ||
709 | |||
710 | err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], | ||
711 | size); | ||
712 | /* mxcsr reserved bits must be masked to zero for security reasons */ | ||
713 | tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; | ||
714 | if (err || __copy_from_user(&env, buf, sizeof(env))) | ||
715 | return 1; | ||
716 | convert_to_fxsr(tsk, &env); | ||
717 | |||
718 | return 0; | ||
719 | } | ||
720 | |||
721 | static int restore_i387_xsave(void __user *buf) | ||
722 | { | ||
723 | struct _fpx_sw_bytes fx_sw_user; | ||
724 | struct _fpstate_ia32 __user *fx_user = | ||
725 | ((struct _fpstate_ia32 __user *) buf); | ||
726 | struct i387_fxsave_struct __user *fx = | ||
727 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; | ||
728 | struct xsave_hdr_struct *xsave_hdr = | ||
729 | ¤t->thread.fpu.state->xsave.xsave_hdr; | ||
730 | u64 mask; | ||
731 | int err; | ||
732 | |||
733 | if (check_for_xstate(fx, buf, &fx_sw_user)) | ||
734 | goto fx_only; | ||
735 | |||
736 | mask = fx_sw_user.xstate_bv; | ||
737 | |||
738 | err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); | ||
739 | |||
740 | xsave_hdr->xstate_bv &= pcntxt_mask; | ||
741 | /* | ||
742 | * These bits must be zero. | ||
743 | */ | ||
744 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
745 | |||
746 | /* | ||
747 | * Init the state that is not present in the memory layout | ||
748 | * and enabled by the OS. | ||
749 | */ | ||
750 | mask = ~(pcntxt_mask & ~mask); | ||
751 | xsave_hdr->xstate_bv &= mask; | ||
752 | |||
753 | return err; | ||
754 | fx_only: | ||
755 | /* | ||
756 | * Couldn't find the extended state information in the memory | ||
757 | * layout. Restore the FP/SSE and init the other extended state | ||
758 | * enabled by the OS. | ||
759 | */ | ||
760 | xsave_hdr->xstate_bv = XSTATE_FPSSE; | ||
761 | return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); | ||
762 | } | ||
763 | |||
764 | int restore_i387_xstate_ia32(void __user *buf) | ||
765 | { | ||
766 | int err; | ||
767 | struct task_struct *tsk = current; | ||
768 | struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; | ||
769 | |||
770 | if (HAVE_HWFP) | ||
771 | clear_fpu(tsk); | ||
772 | |||
773 | if (!buf) { | ||
774 | if (used_math()) { | ||
775 | clear_fpu(tsk); | ||
776 | clear_used_math(); | ||
777 | } | ||
778 | |||
779 | return 0; | ||
780 | } else | ||
781 | if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) | ||
782 | return -EACCES; | ||
783 | |||
784 | if (!used_math()) { | ||
785 | err = init_fpu(tsk); | ||
786 | if (err) | ||
787 | return err; | ||
788 | } | ||
789 | |||
790 | if (HAVE_HWFP) { | ||
791 | if (cpu_has_xsave) | ||
792 | err = restore_i387_xsave(buf); | ||
793 | else if (cpu_has_fxsr) | ||
794 | err = restore_i387_fxsave(fp, sizeof(struct | ||
795 | i387_fxsave_struct)); | ||
796 | else | ||
797 | err = restore_i387_fsave(fp); | ||
798 | } else { | ||
799 | err = fpregs_soft_set(current, NULL, | ||
800 | 0, sizeof(struct user_i387_ia32_struct), | ||
801 | NULL, fp) != 0; | ||
802 | } | ||
803 | set_used_math(); | ||
804 | |||
805 | return err; | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * FPU state for core dumps. | 573 | * FPU state for core dumps. |
810 | * This is only used for a.out dumps now. | 574 | * This is only used for a.out dumps now. |
811 | * It is declared generically using elf_fpregset_t (which is | 575 | * It is declared generically using elf_fpregset_t (which is |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 36d1853e91af..9a5c460404dc 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -263,7 +263,7 @@ static void i8259A_shutdown(void) | |||
263 | * out of. | 263 | * out of. |
264 | */ | 264 | */ |
265 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 265 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
266 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ | 266 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
267 | } | 267 | } |
268 | 268 | ||
269 | static struct syscore_ops i8259_syscore_ops = { | 269 | static struct syscore_ops i8259_syscore_ops = { |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d44f7829968e..e4595f105910 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) | |||
92 | seq_printf(p, " Rescheduling interrupts\n"); | 92 | seq_printf(p, " Rescheduling interrupts\n"); |
93 | seq_printf(p, "%*s: ", prec, "CAL"); | 93 | seq_printf(p, "%*s: ", prec, "CAL"); |
94 | for_each_online_cpu(j) | 94 | for_each_online_cpu(j) |
95 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); | 95 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count - |
96 | irq_stats(j)->irq_tlb_count); | ||
96 | seq_printf(p, " Function call interrupts\n"); | 97 | seq_printf(p, " Function call interrupts\n"); |
97 | seq_printf(p, "%*s: ", prec, "TLB"); | 98 | seq_printf(p, "%*s: ", prec, "TLB"); |
98 | for_each_online_cpu(j) | 99 | for_each_online_cpu(j) |
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
147 | #ifdef CONFIG_SMP | 148 | #ifdef CONFIG_SMP |
148 | sum += irq_stats(cpu)->irq_resched_count; | 149 | sum += irq_stats(cpu)->irq_resched_count; |
149 | sum += irq_stats(cpu)->irq_call_count; | 150 | sum += irq_stats(cpu)->irq_call_count; |
150 | sum += irq_stats(cpu)->irq_tlb_count; | ||
151 | #endif | 151 | #endif |
152 | #ifdef CONFIG_X86_THERMAL_VECTOR | 152 | #ifdef CONFIG_X86_THERMAL_VECTOR |
153 | sum += irq_stats(cpu)->irq_thermal_count; | 153 | sum += irq_stats(cpu)->irq_thermal_count; |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b1..57916c0d3cf6 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb | |||
541 | return 1; | 541 | return 1; |
542 | } | 542 | } |
543 | 543 | ||
544 | #ifdef KPROBES_CAN_USE_FTRACE | ||
545 | static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
546 | struct kprobe_ctlblk *kcb) | ||
547 | { | ||
548 | /* | ||
549 | * Emulate singlestep (and also recover regs->ip) | ||
550 | * as if there is a 5byte nop | ||
551 | */ | ||
552 | regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; | ||
553 | if (unlikely(p->post_handler)) { | ||
554 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
555 | p->post_handler(p, regs, 0); | ||
556 | } | ||
557 | __this_cpu_write(current_kprobe, NULL); | ||
558 | } | ||
559 | #endif | ||
560 | |||
544 | /* | 561 | /* |
545 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | 562 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they |
546 | * remain disabled throughout this function. | 563 | * remain disabled throughout this function. |
@@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
599 | } else if (kprobe_running()) { | 616 | } else if (kprobe_running()) { |
600 | p = __this_cpu_read(current_kprobe); | 617 | p = __this_cpu_read(current_kprobe); |
601 | if (p->break_handler && p->break_handler(p, regs)) { | 618 | if (p->break_handler && p->break_handler(p, regs)) { |
619 | #ifdef KPROBES_CAN_USE_FTRACE | ||
620 | if (kprobe_ftrace(p)) { | ||
621 | skip_singlestep(p, regs, kcb); | ||
622 | return 1; | ||
623 | } | ||
624 | #endif | ||
602 | setup_singlestep(p, regs, kcb, 0); | 625 | setup_singlestep(p, regs, kcb, 0); |
603 | return 1; | 626 | return 1; |
604 | } | 627 | } |
@@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1052 | return 0; | 1075 | return 0; |
1053 | } | 1076 | } |
1054 | 1077 | ||
1078 | #ifdef KPROBES_CAN_USE_FTRACE | ||
1079 | /* Ftrace callback handler for kprobes */ | ||
1080 | void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, | ||
1081 | struct ftrace_ops *ops, struct pt_regs *regs) | ||
1082 | { | ||
1083 | struct kprobe *p; | ||
1084 | struct kprobe_ctlblk *kcb; | ||
1085 | unsigned long flags; | ||
1086 | |||
1087 | /* Disable irq for emulating a breakpoint and avoiding preempt */ | ||
1088 | local_irq_save(flags); | ||
1089 | |||
1090 | p = get_kprobe((kprobe_opcode_t *)ip); | ||
1091 | if (unlikely(!p) || kprobe_disabled(p)) | ||
1092 | goto end; | ||
1093 | |||
1094 | kcb = get_kprobe_ctlblk(); | ||
1095 | if (kprobe_running()) { | ||
1096 | kprobes_inc_nmissed_count(p); | ||
1097 | } else { | ||
1098 | /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ | ||
1099 | regs->ip = ip + sizeof(kprobe_opcode_t); | ||
1100 | |||
1101 | __this_cpu_write(current_kprobe, p); | ||
1102 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1103 | if (!p->pre_handler || !p->pre_handler(p, regs)) | ||
1104 | skip_singlestep(p, regs, kcb); | ||
1105 | /* | ||
1106 | * If pre_handler returns !0, it sets regs->ip and | ||
1107 | * resets current kprobe. | ||
1108 | */ | ||
1109 | } | ||
1110 | end: | ||
1111 | local_irq_restore(flags); | ||
1112 | } | ||
1113 | |||
1114 | int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) | ||
1115 | { | ||
1116 | p->ainsn.insn = NULL; | ||
1117 | p->ainsn.boostable = -1; | ||
1118 | return 0; | ||
1119 | } | ||
1120 | #endif | ||
1121 | |||
1055 | int __init arch_init_kprobes(void) | 1122 | int __init arch_init_kprobes(void) |
1056 | { | 1123 | { |
1057 | return arch_init_optprobes(); | 1124 | return arch_init_optprobes(); |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 82746f942cd8..7720ff5a9ee2 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -75,20 +75,113 @@ struct microcode_amd { | |||
75 | 75 | ||
76 | static struct equiv_cpu_entry *equiv_cpu_table; | 76 | static struct equiv_cpu_entry *equiv_cpu_table; |
77 | 77 | ||
78 | /* page-sized ucode patch buffer */ | 78 | struct ucode_patch { |
79 | void *patch; | 79 | struct list_head plist; |
80 | void *data; | ||
81 | u32 patch_id; | ||
82 | u16 equiv_cpu; | ||
83 | }; | ||
84 | |||
85 | static LIST_HEAD(pcache); | ||
86 | |||
87 | static u16 find_equiv_id(unsigned int cpu) | ||
88 | { | ||
89 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
90 | int i = 0; | ||
91 | |||
92 | if (!equiv_cpu_table) | ||
93 | return 0; | ||
94 | |||
95 | while (equiv_cpu_table[i].installed_cpu != 0) { | ||
96 | if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) | ||
97 | return equiv_cpu_table[i].equiv_cpu; | ||
98 | |||
99 | i++; | ||
100 | } | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) | ||
105 | { | ||
106 | int i = 0; | ||
107 | |||
108 | BUG_ON(!equiv_cpu_table); | ||
109 | |||
110 | while (equiv_cpu_table[i].equiv_cpu != 0) { | ||
111 | if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) | ||
112 | return equiv_cpu_table[i].installed_cpu; | ||
113 | i++; | ||
114 | } | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * a small, trivial cache of per-family ucode patches | ||
120 | */ | ||
121 | static struct ucode_patch *cache_find_patch(u16 equiv_cpu) | ||
122 | { | ||
123 | struct ucode_patch *p; | ||
124 | |||
125 | list_for_each_entry(p, &pcache, plist) | ||
126 | if (p->equiv_cpu == equiv_cpu) | ||
127 | return p; | ||
128 | return NULL; | ||
129 | } | ||
130 | |||
131 | static void update_cache(struct ucode_patch *new_patch) | ||
132 | { | ||
133 | struct ucode_patch *p; | ||
134 | |||
135 | list_for_each_entry(p, &pcache, plist) { | ||
136 | if (p->equiv_cpu == new_patch->equiv_cpu) { | ||
137 | if (p->patch_id >= new_patch->patch_id) | ||
138 | /* we already have the latest patch */ | ||
139 | return; | ||
140 | |||
141 | list_replace(&p->plist, &new_patch->plist); | ||
142 | kfree(p->data); | ||
143 | kfree(p); | ||
144 | return; | ||
145 | } | ||
146 | } | ||
147 | /* no patch found, add it */ | ||
148 | list_add_tail(&new_patch->plist, &pcache); | ||
149 | } | ||
150 | |||
151 | static void free_cache(void) | ||
152 | { | ||
153 | struct ucode_patch *p, *tmp; | ||
154 | |||
155 | list_for_each_entry_safe(p, tmp, &pcache, plist) { | ||
156 | __list_del(p->plist.prev, p->plist.next); | ||
157 | kfree(p->data); | ||
158 | kfree(p); | ||
159 | } | ||
160 | } | ||
161 | |||
162 | static struct ucode_patch *find_patch(unsigned int cpu) | ||
163 | { | ||
164 | u16 equiv_id; | ||
165 | |||
166 | equiv_id = find_equiv_id(cpu); | ||
167 | if (!equiv_id) | ||
168 | return NULL; | ||
169 | |||
170 | return cache_find_patch(equiv_id); | ||
171 | } | ||
80 | 172 | ||
81 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 173 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
82 | { | 174 | { |
83 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 175 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
84 | 176 | ||
177 | csig->sig = cpuid_eax(0x00000001); | ||
85 | csig->rev = c->microcode; | 178 | csig->rev = c->microcode; |
86 | pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); | 179 | pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); |
87 | 180 | ||
88 | return 0; | 181 | return 0; |
89 | } | 182 | } |
90 | 183 | ||
91 | static unsigned int verify_ucode_size(int cpu, u32 patch_size, | 184 | static unsigned int verify_patch_size(int cpu, u32 patch_size, |
92 | unsigned int size) | 185 | unsigned int size) |
93 | { | 186 | { |
94 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 187 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
@@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size, | |||
118 | return patch_size; | 211 | return patch_size; |
119 | } | 212 | } |
120 | 213 | ||
121 | static u16 find_equiv_id(void) | 214 | static int apply_microcode_amd(int cpu) |
122 | { | 215 | { |
123 | unsigned int current_cpu_id, i = 0; | 216 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
124 | 217 | struct microcode_amd *mc_amd; | |
125 | BUG_ON(equiv_cpu_table == NULL); | 218 | struct ucode_cpu_info *uci; |
126 | 219 | struct ucode_patch *p; | |
127 | current_cpu_id = cpuid_eax(0x00000001); | 220 | u32 rev, dummy; |
128 | |||
129 | while (equiv_cpu_table[i].installed_cpu != 0) { | ||
130 | if (current_cpu_id == equiv_cpu_table[i].installed_cpu) | ||
131 | return equiv_cpu_table[i].equiv_cpu; | ||
132 | |||
133 | i++; | ||
134 | } | ||
135 | return 0; | ||
136 | } | ||
137 | 221 | ||
138 | /* | 222 | BUG_ON(raw_smp_processor_id() != cpu); |
139 | * we signal a good patch is found by returning its size > 0 | ||
140 | */ | ||
141 | static int get_matching_microcode(int cpu, const u8 *ucode_ptr, | ||
142 | unsigned int leftover_size, int rev, | ||
143 | unsigned int *current_size) | ||
144 | { | ||
145 | struct microcode_header_amd *mc_hdr; | ||
146 | unsigned int actual_size, patch_size; | ||
147 | u16 equiv_cpu_id; | ||
148 | 223 | ||
149 | /* size of the current patch we're staring at */ | 224 | uci = ucode_cpu_info + cpu; |
150 | patch_size = *(u32 *)(ucode_ptr + 4); | ||
151 | *current_size = patch_size + SECTION_HDR_SIZE; | ||
152 | 225 | ||
153 | equiv_cpu_id = find_equiv_id(); | 226 | p = find_patch(cpu); |
154 | if (!equiv_cpu_id) | 227 | if (!p) |
155 | return 0; | 228 | return 0; |
156 | 229 | ||
157 | /* | 230 | mc_amd = p->data; |
158 | * let's look at the patch header itself now | 231 | uci->mc = p->data; |
159 | */ | ||
160 | mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); | ||
161 | 232 | ||
162 | if (mc_hdr->processor_rev_id != equiv_cpu_id) | 233 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
163 | return 0; | ||
164 | 234 | ||
165 | /* ucode might be chipset specific -- currently we don't support this */ | 235 | /* need to apply patch? */ |
166 | if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { | 236 | if (rev >= mc_amd->hdr.patch_id) { |
167 | pr_err("CPU%d: chipset specific code not yet supported\n", | 237 | c->microcode = rev; |
168 | cpu); | ||
169 | return 0; | 238 | return 0; |
170 | } | 239 | } |
171 | 240 | ||
172 | if (mc_hdr->patch_id <= rev) | ||
173 | return 0; | ||
174 | |||
175 | /* | ||
176 | * now that the header looks sane, verify its size | ||
177 | */ | ||
178 | actual_size = verify_ucode_size(cpu, patch_size, leftover_size); | ||
179 | if (!actual_size) | ||
180 | return 0; | ||
181 | |||
182 | /* clear the patch buffer */ | ||
183 | memset(patch, 0, PAGE_SIZE); | ||
184 | |||
185 | /* all looks ok, get the binary patch */ | ||
186 | get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); | ||
187 | |||
188 | return actual_size; | ||
189 | } | ||
190 | |||
191 | static int apply_microcode_amd(int cpu) | ||
192 | { | ||
193 | u32 rev, dummy; | ||
194 | int cpu_num = raw_smp_processor_id(); | ||
195 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
196 | struct microcode_amd *mc_amd = uci->mc; | ||
197 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
198 | |||
199 | /* We should bind the task to the CPU */ | ||
200 | BUG_ON(cpu_num != cpu); | ||
201 | |||
202 | if (mc_amd == NULL) | ||
203 | return 0; | ||
204 | |||
205 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | 241 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); |
206 | /* get patch id after patching */ | ||
207 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); | ||
208 | 242 | ||
209 | /* check current patch id and patch's id for match */ | 243 | /* verify patch application was successful */ |
244 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); | ||
210 | if (rev != mc_amd->hdr.patch_id) { | 245 | if (rev != mc_amd->hdr.patch_id) { |
211 | pr_err("CPU%d: update failed for patch_level=0x%08x\n", | 246 | pr_err("CPU%d: update failed for patch_level=0x%08x\n", |
212 | cpu, mc_amd->hdr.patch_id); | 247 | cpu, mc_amd->hdr.patch_id); |
@@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
238 | return -ENOMEM; | 273 | return -ENOMEM; |
239 | } | 274 | } |
240 | 275 | ||
241 | get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); | 276 | memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); |
242 | 277 | ||
243 | /* add header length */ | 278 | /* add header length */ |
244 | return size + CONTAINER_HDR_SZ; | 279 | return size + CONTAINER_HDR_SZ; |
@@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void) | |||
250 | equiv_cpu_table = NULL; | 285 | equiv_cpu_table = NULL; |
251 | } | 286 | } |
252 | 287 | ||
253 | static enum ucode_state | 288 | static void cleanup(void) |
254 | generic_load_microcode(int cpu, const u8 *data, size_t size) | ||
255 | { | 289 | { |
256 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 290 | free_equiv_cpu_table(); |
257 | struct microcode_header_amd *mc_hdr = NULL; | 291 | free_cache(); |
258 | unsigned int mc_size, leftover, current_size = 0; | 292 | } |
293 | |||
294 | /* | ||
295 | * We return the current size even if some of the checks failed so that | ||
296 | * we can skip over the next patch. If we return a negative value, we | ||
297 | * signal a grave error like a memory allocation has failed and the | ||
298 | * driver cannot continue functioning normally. In such cases, we tear | ||
299 | * down everything we've used up so far and exit. | ||
300 | */ | ||
301 | static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) | ||
302 | { | ||
303 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
304 | struct microcode_header_amd *mc_hdr; | ||
305 | struct ucode_patch *patch; | ||
306 | unsigned int patch_size, crnt_size, ret; | ||
307 | u32 proc_fam; | ||
308 | u16 proc_id; | ||
309 | |||
310 | patch_size = *(u32 *)(fw + 4); | ||
311 | crnt_size = patch_size + SECTION_HDR_SIZE; | ||
312 | mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); | ||
313 | proc_id = mc_hdr->processor_rev_id; | ||
314 | |||
315 | proc_fam = find_cpu_family_by_equiv_cpu(proc_id); | ||
316 | if (!proc_fam) { | ||
317 | pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); | ||
318 | return crnt_size; | ||
319 | } | ||
320 | |||
321 | /* check if patch is for the current family */ | ||
322 | proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); | ||
323 | if (proc_fam != c->x86) | ||
324 | return crnt_size; | ||
325 | |||
326 | if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { | ||
327 | pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", | ||
328 | mc_hdr->patch_id); | ||
329 | return crnt_size; | ||
330 | } | ||
331 | |||
332 | ret = verify_patch_size(cpu, patch_size, leftover); | ||
333 | if (!ret) { | ||
334 | pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); | ||
335 | return crnt_size; | ||
336 | } | ||
337 | |||
338 | patch = kzalloc(sizeof(*patch), GFP_KERNEL); | ||
339 | if (!patch) { | ||
340 | pr_err("Patch allocation failure.\n"); | ||
341 | return -EINVAL; | ||
342 | } | ||
343 | |||
344 | patch->data = kzalloc(patch_size, GFP_KERNEL); | ||
345 | if (!patch->data) { | ||
346 | pr_err("Patch data allocation failure.\n"); | ||
347 | kfree(patch); | ||
348 | return -EINVAL; | ||
349 | } | ||
350 | |||
351 | /* All looks ok, copy patch... */ | ||
352 | memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); | ||
353 | INIT_LIST_HEAD(&patch->plist); | ||
354 | patch->patch_id = mc_hdr->patch_id; | ||
355 | patch->equiv_cpu = proc_id; | ||
356 | |||
357 | /* ... and add to cache. */ | ||
358 | update_cache(patch); | ||
359 | |||
360 | return crnt_size; | ||
361 | } | ||
362 | |||
363 | static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) | ||
364 | { | ||
365 | enum ucode_state ret = UCODE_ERROR; | ||
366 | unsigned int leftover; | ||
367 | u8 *fw = (u8 *)data; | ||
368 | int crnt_size = 0; | ||
259 | int offset; | 369 | int offset; |
260 | const u8 *ucode_ptr = data; | ||
261 | void *new_mc = NULL; | ||
262 | unsigned int new_rev = uci->cpu_sig.rev; | ||
263 | enum ucode_state state = UCODE_ERROR; | ||
264 | 370 | ||
265 | offset = install_equiv_cpu_table(ucode_ptr); | 371 | offset = install_equiv_cpu_table(data); |
266 | if (offset < 0) { | 372 | if (offset < 0) { |
267 | pr_err("failed to create equivalent cpu table\n"); | 373 | pr_err("failed to create equivalent cpu table\n"); |
268 | goto out; | 374 | return ret; |
269 | } | 375 | } |
270 | ucode_ptr += offset; | 376 | fw += offset; |
271 | leftover = size - offset; | 377 | leftover = size - offset; |
272 | 378 | ||
273 | if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { | 379 | if (*(u32 *)fw != UCODE_UCODE_TYPE) { |
274 | pr_err("invalid type field in container file section header\n"); | 380 | pr_err("invalid type field in container file section header\n"); |
275 | goto free_table; | 381 | free_equiv_cpu_table(); |
382 | return ret; | ||
276 | } | 383 | } |
277 | 384 | ||
278 | while (leftover) { | 385 | while (leftover) { |
279 | mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, | 386 | crnt_size = verify_and_add_patch(cpu, fw, leftover); |
280 | new_rev, ¤t_size); | 387 | if (crnt_size < 0) |
281 | if (mc_size) { | 388 | return ret; |
282 | mc_hdr = patch; | ||
283 | new_mc = patch; | ||
284 | new_rev = mc_hdr->patch_id; | ||
285 | goto out_ok; | ||
286 | } | ||
287 | |||
288 | ucode_ptr += current_size; | ||
289 | leftover -= current_size; | ||
290 | } | ||
291 | 389 | ||
292 | if (!new_mc) { | 390 | fw += crnt_size; |
293 | state = UCODE_NFOUND; | 391 | leftover -= crnt_size; |
294 | goto free_table; | ||
295 | } | 392 | } |
296 | 393 | ||
297 | out_ok: | 394 | return UCODE_OK; |
298 | uci->mc = new_mc; | ||
299 | state = UCODE_OK; | ||
300 | pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", | ||
301 | cpu, uci->cpu_sig.rev, new_rev); | ||
302 | |||
303 | free_table: | ||
304 | free_equiv_cpu_table(); | ||
305 | |||
306 | out: | ||
307 | return state; | ||
308 | } | 395 | } |
309 | 396 | ||
310 | /* | 397 | /* |
@@ -315,7 +402,7 @@ out: | |||
315 | * | 402 | * |
316 | * This legacy file is always smaller than 2K in size. | 403 | * This legacy file is always smaller than 2K in size. |
317 | * | 404 | * |
318 | * Starting at family 15h they are in family specific firmware files: | 405 | * Beginning with family 15h, they are in family-specific firmware files: |
319 | * | 406 | * |
320 | * amd-ucode/microcode_amd_fam15h.bin | 407 | * amd-ucode/microcode_amd_fam15h.bin |
321 | * amd-ucode/microcode_amd_fam16h.bin | 408 | * amd-ucode/microcode_amd_fam16h.bin |
@@ -323,12 +410,17 @@ out: | |||
323 | * | 410 | * |
324 | * These might be larger than 2K. | 411 | * These might be larger than 2K. |
325 | */ | 412 | */ |
326 | static enum ucode_state request_microcode_amd(int cpu, struct device *device) | 413 | static enum ucode_state request_microcode_amd(int cpu, struct device *device, |
414 | bool refresh_fw) | ||
327 | { | 415 | { |
328 | char fw_name[36] = "amd-ucode/microcode_amd.bin"; | 416 | char fw_name[36] = "amd-ucode/microcode_amd.bin"; |
329 | const struct firmware *fw; | ||
330 | enum ucode_state ret = UCODE_NFOUND; | ||
331 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 417 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
418 | enum ucode_state ret = UCODE_NFOUND; | ||
419 | const struct firmware *fw; | ||
420 | |||
421 | /* reload ucode container only on the boot cpu */ | ||
422 | if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) | ||
423 | return UCODE_OK; | ||
332 | 424 | ||
333 | if (c->x86 >= 0x15) | 425 | if (c->x86 >= 0x15) |
334 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); | 426 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); |
@@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device) | |||
344 | goto fw_release; | 436 | goto fw_release; |
345 | } | 437 | } |
346 | 438 | ||
347 | ret = generic_load_microcode(cpu, fw->data, fw->size); | 439 | /* free old equiv table */ |
440 | free_equiv_cpu_table(); | ||
441 | |||
442 | ret = load_microcode_amd(cpu, fw->data, fw->size); | ||
443 | if (ret != UCODE_OK) | ||
444 | cleanup(); | ||
348 | 445 | ||
349 | fw_release: | 446 | fw_release: |
350 | release_firmware(fw); | 447 | release_firmware(fw); |
351 | 448 | ||
352 | out: | 449 | out: |
353 | return ret; | 450 | return ret; |
354 | } | 451 | } |
355 | 452 | ||
@@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void) | |||
383 | return NULL; | 480 | return NULL; |
384 | } | 481 | } |
385 | 482 | ||
386 | patch = (void *)get_zeroed_page(GFP_KERNEL); | ||
387 | if (!patch) | ||
388 | return NULL; | ||
389 | |||
390 | return µcode_amd_ops; | 483 | return µcode_amd_ops; |
391 | } | 484 | } |
392 | 485 | ||
393 | void __exit exit_amd_microcode(void) | 486 | void __exit exit_amd_microcode(void) |
394 | { | 487 | { |
395 | free_page((unsigned long)patch); | 488 | cleanup(); |
396 | } | 489 | } |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 9e5bcf1e2376..3a04b224d0c0 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -279,19 +279,18 @@ static struct platform_device *microcode_pdev; | |||
279 | static int reload_for_cpu(int cpu) | 279 | static int reload_for_cpu(int cpu) |
280 | { | 280 | { |
281 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 281 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
282 | enum ucode_state ustate; | ||
282 | int err = 0; | 283 | int err = 0; |
283 | 284 | ||
284 | if (uci->valid) { | 285 | if (!uci->valid) |
285 | enum ucode_state ustate; | 286 | return err; |
286 | |||
287 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); | ||
288 | if (ustate == UCODE_OK) | ||
289 | apply_microcode_on_target(cpu); | ||
290 | else | ||
291 | if (ustate == UCODE_ERROR) | ||
292 | err = -EINVAL; | ||
293 | } | ||
294 | 287 | ||
288 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); | ||
289 | if (ustate == UCODE_OK) | ||
290 | apply_microcode_on_target(cpu); | ||
291 | else | ||
292 | if (ustate == UCODE_ERROR) | ||
293 | err = -EINVAL; | ||
295 | return err; | 294 | return err; |
296 | } | 295 | } |
297 | 296 | ||
@@ -373,18 +372,15 @@ static void microcode_fini_cpu(int cpu) | |||
373 | 372 | ||
374 | static enum ucode_state microcode_resume_cpu(int cpu) | 373 | static enum ucode_state microcode_resume_cpu(int cpu) |
375 | { | 374 | { |
376 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
377 | |||
378 | if (!uci->mc) | ||
379 | return UCODE_NFOUND; | ||
380 | |||
381 | pr_debug("CPU%d updated upon resume\n", cpu); | 375 | pr_debug("CPU%d updated upon resume\n", cpu); |
382 | apply_microcode_on_target(cpu); | 376 | |
377 | if (apply_microcode_on_target(cpu)) | ||
378 | return UCODE_ERROR; | ||
383 | 379 | ||
384 | return UCODE_OK; | 380 | return UCODE_OK; |
385 | } | 381 | } |
386 | 382 | ||
387 | static enum ucode_state microcode_init_cpu(int cpu) | 383 | static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) |
388 | { | 384 | { |
389 | enum ucode_state ustate; | 385 | enum ucode_state ustate; |
390 | 386 | ||
@@ -395,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu) | |||
395 | if (system_state != SYSTEM_RUNNING) | 391 | if (system_state != SYSTEM_RUNNING) |
396 | return UCODE_NFOUND; | 392 | return UCODE_NFOUND; |
397 | 393 | ||
398 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); | 394 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, |
395 | refresh_fw); | ||
399 | 396 | ||
400 | if (ustate == UCODE_OK) { | 397 | if (ustate == UCODE_OK) { |
401 | pr_debug("CPU%d updated upon init\n", cpu); | 398 | pr_debug("CPU%d updated upon init\n", cpu); |
@@ -408,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu) | |||
408 | static enum ucode_state microcode_update_cpu(int cpu) | 405 | static enum ucode_state microcode_update_cpu(int cpu) |
409 | { | 406 | { |
410 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 407 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
411 | enum ucode_state ustate; | ||
412 | 408 | ||
413 | if (uci->valid) | 409 | if (uci->valid) |
414 | ustate = microcode_resume_cpu(cpu); | 410 | return microcode_resume_cpu(cpu); |
415 | else | ||
416 | ustate = microcode_init_cpu(cpu); | ||
417 | 411 | ||
418 | return ustate; | 412 | return microcode_init_cpu(cpu, false); |
419 | } | 413 | } |
420 | 414 | ||
421 | static int mc_device_add(struct device *dev, struct subsys_interface *sif) | 415 | static int mc_device_add(struct device *dev, struct subsys_interface *sif) |
@@ -431,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) | |||
431 | if (err) | 425 | if (err) |
432 | return err; | 426 | return err; |
433 | 427 | ||
434 | if (microcode_init_cpu(cpu) == UCODE_ERROR) | 428 | if (microcode_init_cpu(cpu, true) == UCODE_ERROR) |
435 | return -EINVAL; | 429 | return -EINVAL; |
436 | 430 | ||
437 | return err; | 431 | return err; |
@@ -480,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
480 | struct device *dev; | 474 | struct device *dev; |
481 | 475 | ||
482 | dev = get_cpu_device(cpu); | 476 | dev = get_cpu_device(cpu); |
483 | switch (action) { | 477 | |
478 | switch (action & ~CPU_TASKS_FROZEN) { | ||
484 | case CPU_ONLINE: | 479 | case CPU_ONLINE: |
485 | case CPU_ONLINE_FROZEN: | ||
486 | microcode_update_cpu(cpu); | 480 | microcode_update_cpu(cpu); |
487 | case CPU_DOWN_FAILED: | ||
488 | case CPU_DOWN_FAILED_FROZEN: | ||
489 | pr_debug("CPU%d added\n", cpu); | 481 | pr_debug("CPU%d added\n", cpu); |
482 | /* | ||
483 | * "break" is missing on purpose here because we want to fall | ||
484 | * through in order to create the sysfs group. | ||
485 | */ | ||
486 | |||
487 | case CPU_DOWN_FAILED: | ||
490 | if (sysfs_create_group(&dev->kobj, &mc_attr_group)) | 488 | if (sysfs_create_group(&dev->kobj, &mc_attr_group)) |
491 | pr_err("Failed to create group for CPU%d\n", cpu); | 489 | pr_err("Failed to create group for CPU%d\n", cpu); |
492 | break; | 490 | break; |
491 | |||
493 | case CPU_DOWN_PREPARE: | 492 | case CPU_DOWN_PREPARE: |
494 | case CPU_DOWN_PREPARE_FROZEN: | ||
495 | /* Suspend is in progress, only remove the interface */ | 493 | /* Suspend is in progress, only remove the interface */ |
496 | sysfs_remove_group(&dev->kobj, &mc_attr_group); | 494 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
497 | pr_debug("CPU%d removed\n", cpu); | 495 | pr_debug("CPU%d removed\n", cpu); |
498 | break; | 496 | break; |
499 | 497 | ||
500 | /* | 498 | /* |
499 | * case CPU_DEAD: | ||
500 | * | ||
501 | * When a CPU goes offline, don't free up or invalidate the copy of | 501 | * When a CPU goes offline, don't free up or invalidate the copy of |
502 | * the microcode in kernel memory, so that we can reuse it when the | 502 | * the microcode in kernel memory, so that we can reuse it when the |
503 | * CPU comes back online without unnecessarily requesting the userspace | 503 | * CPU comes back online without unnecessarily requesting the userspace |
504 | * for it again. | 504 | * for it again. |
505 | */ | 505 | */ |
506 | case CPU_UP_CANCELED_FROZEN: | ||
507 | /* The CPU refused to come up during a system resume */ | ||
508 | microcode_fini_cpu(cpu); | ||
509 | break; | ||
510 | } | 506 | } |
507 | |||
508 | /* The CPU refused to come up during a system resume */ | ||
509 | if (action == CPU_UP_CANCELED_FROZEN) | ||
510 | microcode_fini_cpu(cpu); | ||
511 | |||
511 | return NOTIFY_OK; | 512 | return NOTIFY_OK; |
512 | } | 513 | } |
513 | 514 | ||
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0327e2b3c408..3544aed39338 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n) | |||
405 | return 0; | 405 | return 0; |
406 | } | 406 | } |
407 | 407 | ||
408 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) | 408 | static enum ucode_state request_microcode_fw(int cpu, struct device *device, |
409 | bool refresh_fw) | ||
409 | { | 410 | { |
410 | char name[30]; | 411 | char name[30]; |
411 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 412 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index eb113693f043..a7c5661f8496 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -257,12 +257,14 @@ static int __init msr_init(void) | |||
257 | goto out_chrdev; | 257 | goto out_chrdev; |
258 | } | 258 | } |
259 | msr_class->devnode = msr_devnode; | 259 | msr_class->devnode = msr_devnode; |
260 | get_online_cpus(); | ||
260 | for_each_online_cpu(i) { | 261 | for_each_online_cpu(i) { |
261 | err = msr_device_create(i); | 262 | err = msr_device_create(i); |
262 | if (err != 0) | 263 | if (err != 0) |
263 | goto out_class; | 264 | goto out_class; |
264 | } | 265 | } |
265 | register_hotcpu_notifier(&msr_class_cpu_notifier); | 266 | register_hotcpu_notifier(&msr_class_cpu_notifier); |
267 | put_online_cpus(); | ||
266 | 268 | ||
267 | err = 0; | 269 | err = 0; |
268 | goto out; | 270 | goto out; |
@@ -271,6 +273,7 @@ out_class: | |||
271 | i = 0; | 273 | i = 0; |
272 | for_each_online_cpu(i) | 274 | for_each_online_cpu(i) |
273 | msr_device_destroy(i); | 275 | msr_device_destroy(i); |
276 | put_online_cpus(); | ||
274 | class_destroy(msr_class); | 277 | class_destroy(msr_class); |
275 | out_chrdev: | 278 | out_chrdev: |
276 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); | 279 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
@@ -281,11 +284,13 @@ out: | |||
281 | static void __exit msr_exit(void) | 284 | static void __exit msr_exit(void) |
282 | { | 285 | { |
283 | int cpu = 0; | 286 | int cpu = 0; |
287 | get_online_cpus(); | ||
284 | for_each_online_cpu(cpu) | 288 | for_each_online_cpu(cpu) |
285 | msr_device_destroy(cpu); | 289 | msr_device_destroy(cpu); |
286 | class_destroy(msr_class); | 290 | class_destroy(msr_class); |
287 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); | 291 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
288 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); | 292 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); |
293 | put_online_cpus(); | ||
289 | } | 294 | } |
290 | 295 | ||
291 | module_init(msr_init); | 296 | module_init(msr_init); |
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 000000000000..e309cc5c276e --- /dev/null +++ b/arch/x86/kernel/perf_regs.c | |||
@@ -0,0 +1,105 @@ | |||
1 | #include <linux/errno.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/perf_event.h> | ||
5 | #include <linux/bug.h> | ||
6 | #include <linux/stddef.h> | ||
7 | #include <asm/perf_regs.h> | ||
8 | #include <asm/ptrace.h> | ||
9 | |||
10 | #ifdef CONFIG_X86_32 | ||
11 | #define PERF_REG_X86_MAX PERF_REG_X86_32_MAX | ||
12 | #else | ||
13 | #define PERF_REG_X86_MAX PERF_REG_X86_64_MAX | ||
14 | #endif | ||
15 | |||
16 | #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) | ||
17 | |||
18 | static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { | ||
19 | PT_REGS_OFFSET(PERF_REG_X86_AX, ax), | ||
20 | PT_REGS_OFFSET(PERF_REG_X86_BX, bx), | ||
21 | PT_REGS_OFFSET(PERF_REG_X86_CX, cx), | ||
22 | PT_REGS_OFFSET(PERF_REG_X86_DX, dx), | ||
23 | PT_REGS_OFFSET(PERF_REG_X86_SI, si), | ||
24 | PT_REGS_OFFSET(PERF_REG_X86_DI, di), | ||
25 | PT_REGS_OFFSET(PERF_REG_X86_BP, bp), | ||
26 | PT_REGS_OFFSET(PERF_REG_X86_SP, sp), | ||
27 | PT_REGS_OFFSET(PERF_REG_X86_IP, ip), | ||
28 | PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), | ||
29 | PT_REGS_OFFSET(PERF_REG_X86_CS, cs), | ||
30 | PT_REGS_OFFSET(PERF_REG_X86_SS, ss), | ||
31 | #ifdef CONFIG_X86_32 | ||
32 | PT_REGS_OFFSET(PERF_REG_X86_DS, ds), | ||
33 | PT_REGS_OFFSET(PERF_REG_X86_ES, es), | ||
34 | PT_REGS_OFFSET(PERF_REG_X86_FS, fs), | ||
35 | PT_REGS_OFFSET(PERF_REG_X86_GS, gs), | ||
36 | #else | ||
37 | /* | ||
38 | * The pt_regs struct does not store | ||
39 | * ds, es, fs, gs in 64 bit mode. | ||
40 | */ | ||
41 | (unsigned int) -1, | ||
42 | (unsigned int) -1, | ||
43 | (unsigned int) -1, | ||
44 | (unsigned int) -1, | ||
45 | #endif | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | PT_REGS_OFFSET(PERF_REG_X86_R8, r8), | ||
48 | PT_REGS_OFFSET(PERF_REG_X86_R9, r9), | ||
49 | PT_REGS_OFFSET(PERF_REG_X86_R10, r10), | ||
50 | PT_REGS_OFFSET(PERF_REG_X86_R11, r11), | ||
51 | PT_REGS_OFFSET(PERF_REG_X86_R12, r12), | ||
52 | PT_REGS_OFFSET(PERF_REG_X86_R13, r13), | ||
53 | PT_REGS_OFFSET(PERF_REG_X86_R14, r14), | ||
54 | PT_REGS_OFFSET(PERF_REG_X86_R15, r15), | ||
55 | #endif | ||
56 | }; | ||
57 | |||
58 | u64 perf_reg_value(struct pt_regs *regs, int idx) | ||
59 | { | ||
60 | if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) | ||
61 | return 0; | ||
62 | |||
63 | return regs_get_register(regs, pt_regs_offset[idx]); | ||
64 | } | ||
65 | |||
66 | #define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) | ||
67 | |||
68 | #ifdef CONFIG_X86_32 | ||
69 | int perf_reg_validate(u64 mask) | ||
70 | { | ||
71 | if (!mask || mask & REG_RESERVED) | ||
72 | return -EINVAL; | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | u64 perf_reg_abi(struct task_struct *task) | ||
78 | { | ||
79 | return PERF_SAMPLE_REGS_ABI_32; | ||
80 | } | ||
81 | #else /* CONFIG_X86_64 */ | ||
82 | #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ | ||
83 | (1ULL << PERF_REG_X86_ES) | \ | ||
84 | (1ULL << PERF_REG_X86_FS) | \ | ||
85 | (1ULL << PERF_REG_X86_GS)) | ||
86 | |||
87 | int perf_reg_validate(u64 mask) | ||
88 | { | ||
89 | if (!mask || mask & REG_RESERVED) | ||
90 | return -EINVAL; | ||
91 | |||
92 | if (mask & REG_NOSUPPORT) | ||
93 | return -EINVAL; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | u64 perf_reg_abi(struct task_struct *task) | ||
99 | { | ||
100 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
101 | return PERF_SAMPLE_REGS_ABI_32; | ||
102 | else | ||
103 | return PERF_SAMPLE_REGS_ABI_64; | ||
104 | } | ||
105 | #endif /* CONFIG_X86_32 */ | ||
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 0bc72e2069e3..d5f15c3f7b25 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c | |||
@@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) | |||
150 | return oprom; | 150 | return oprom; |
151 | } | 151 | } |
152 | 152 | ||
153 | void *pci_map_biosrom(struct pci_dev *pdev) | 153 | void __iomem *pci_map_biosrom(struct pci_dev *pdev) |
154 | { | 154 | { |
155 | struct resource *oprom = find_oprom(pdev); | 155 | struct resource *oprom = find_oprom(pdev); |
156 | 156 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ef6a8456f719..dc3567e083f9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
66 | { | 66 | { |
67 | int ret; | 67 | int ret; |
68 | 68 | ||
69 | unlazy_fpu(src); | ||
70 | |||
71 | *dst = *src; | 69 | *dst = *src; |
72 | if (fpu_allocated(&src->thread.fpu)) { | 70 | if (fpu_allocated(&src->thread.fpu)) { |
73 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); | 71 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
74 | ret = fpu_alloc(&dst->thread.fpu); | 72 | ret = fpu_alloc(&dst->thread.fpu); |
75 | if (ret) | 73 | if (ret) |
76 | return ret; | 74 | return ret; |
77 | fpu_copy(&dst->thread.fpu, &src->thread.fpu); | 75 | fpu_copy(dst, src); |
78 | } | 76 | } |
79 | return 0; | 77 | return 0; |
80 | } | 78 | } |
@@ -97,16 +95,6 @@ void arch_task_cache_init(void) | |||
97 | SLAB_PANIC | SLAB_NOTRACK, NULL); | 95 | SLAB_PANIC | SLAB_NOTRACK, NULL); |
98 | } | 96 | } |
99 | 97 | ||
100 | static inline void drop_fpu(struct task_struct *tsk) | ||
101 | { | ||
102 | /* | ||
103 | * Forget coprocessor state.. | ||
104 | */ | ||
105 | tsk->fpu_counter = 0; | ||
106 | clear_fpu(tsk); | ||
107 | clear_used_math(); | ||
108 | } | ||
109 | |||
110 | /* | 98 | /* |
111 | * Free current thread data structures etc.. | 99 | * Free current thread data structures etc.. |
112 | */ | 100 | */ |
@@ -163,7 +151,13 @@ void flush_thread(void) | |||
163 | 151 | ||
164 | flush_ptrace_hw_breakpoint(tsk); | 152 | flush_ptrace_hw_breakpoint(tsk); |
165 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 153 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
166 | drop_fpu(tsk); | 154 | drop_init_fpu(tsk); |
155 | /* | ||
156 | * Free the FPU state for non xsave platforms. They get reallocated | ||
157 | * lazily at the first use. | ||
158 | */ | ||
159 | if (!use_eager_fpu()) | ||
160 | free_thread_xstate(tsk); | ||
167 | } | 161 | } |
168 | 162 | ||
169 | static void hard_disable_TSC(void) | 163 | static void hard_disable_TSC(void) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa186121b..b9ff83c7135b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
190 | regs->cs = __USER_CS; | 190 | regs->cs = __USER_CS; |
191 | regs->ip = new_ip; | 191 | regs->ip = new_ip; |
192 | regs->sp = new_sp; | 192 | regs->sp = new_sp; |
193 | /* | ||
194 | * Free the old FP and other extended state | ||
195 | */ | ||
196 | free_thread_xstate(current); | ||
197 | } | 193 | } |
198 | EXPORT_SYMBOL_GPL(start_thread); | 194 | EXPORT_SYMBOL_GPL(start_thread); |
199 | 195 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0a980c9d7cb8..8a6d20ce1978 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, | |||
232 | regs->cs = _cs; | 232 | regs->cs = _cs; |
233 | regs->ss = _ss; | 233 | regs->ss = _ss; |
234 | regs->flags = X86_EFLAGS_IF; | 234 | regs->flags = X86_EFLAGS_IF; |
235 | /* | ||
236 | * Free the old FP and other extended state | ||
237 | */ | ||
238 | free_thread_xstate(current); | ||
239 | } | 235 | } |
240 | 236 | ||
241 | void | 237 | void |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c2bf0f..b00b33a18390 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/signal.h> | 21 | #include <linux/signal.h> |
22 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
23 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
24 | #include <linux/rcupdate.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = { | |||
1332 | #define genregs32_get genregs_get | 1333 | #define genregs32_get genregs_get |
1333 | #define genregs32_set genregs_set | 1334 | #define genregs32_set genregs_set |
1334 | 1335 | ||
1335 | #define user_i387_ia32_struct user_i387_struct | ||
1336 | #define user32_fxsr_struct user_fxsr_struct | ||
1337 | |||
1338 | #endif /* CONFIG_X86_64 */ | 1336 | #endif /* CONFIG_X86_64 */ |
1339 | 1337 | ||
1340 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 1338 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
@@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1463 | { | 1461 | { |
1464 | long ret = 0; | 1462 | long ret = 0; |
1465 | 1463 | ||
1464 | rcu_user_exit(); | ||
1465 | |||
1466 | /* | 1466 | /* |
1467 | * If we stepped into a sysenter/syscall insn, it trapped in | 1467 | * If we stepped into a sysenter/syscall insn, it trapped in |
1468 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | 1468 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. |
@@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs) | |||
1526 | !test_thread_flag(TIF_SYSCALL_EMU); | 1526 | !test_thread_flag(TIF_SYSCALL_EMU); |
1527 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) | 1527 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) |
1528 | tracehook_report_syscall_exit(regs, step); | 1528 | tracehook_report_syscall_exit(regs, step); |
1529 | |||
1530 | rcu_user_enter(); | ||
1529 | } | 1531 | } |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80e1b95..4f165479c453 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -961,9 +961,7 @@ void __init setup_arch(char **cmdline_p) | |||
961 | kvmclock_init(); | 961 | kvmclock_init(); |
962 | #endif | 962 | #endif |
963 | 963 | ||
964 | x86_init.paging.pagetable_setup_start(swapper_pg_dir); | 964 | x86_init.paging.pagetable_init(); |
965 | paging_init(); | ||
966 | x86_init.paging.pagetable_setup_done(swapper_pg_dir); | ||
967 | 965 | ||
968 | if (boot_cpu_data.cpuid_level >= 0) { | 966 | if (boot_cpu_data.cpuid_level >= 0) { |
969 | /* A CPU has %cr4 if and only if it has CPUID */ | 967 | /* A CPU has %cr4 if and only if it has CPUID */ |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376e..b33144c8b309 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
114 | regs->orig_ax = -1; /* disable syscall checks */ | 114 | regs->orig_ax = -1; /* disable syscall checks */ |
115 | 115 | ||
116 | get_user_ex(buf, &sc->fpstate); | 116 | get_user_ex(buf, &sc->fpstate); |
117 | err |= restore_i387_xstate(buf); | ||
118 | 117 | ||
119 | get_user_ex(*pax, &sc->ax); | 118 | get_user_ex(*pax, &sc->ax); |
120 | } get_user_catch(err); | 119 | } get_user_catch(err); |
121 | 120 | ||
121 | err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); | ||
122 | |||
122 | return err; | 123 | return err; |
123 | } | 124 | } |
124 | 125 | ||
@@ -206,35 +207,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
206 | void __user **fpstate) | 207 | void __user **fpstate) |
207 | { | 208 | { |
208 | /* Default to using normal stack */ | 209 | /* Default to using normal stack */ |
210 | unsigned long math_size = 0; | ||
209 | unsigned long sp = regs->sp; | 211 | unsigned long sp = regs->sp; |
212 | unsigned long buf_fx = 0; | ||
210 | int onsigstack = on_sig_stack(sp); | 213 | int onsigstack = on_sig_stack(sp); |
211 | 214 | ||
212 | #ifdef CONFIG_X86_64 | ||
213 | /* redzone */ | 215 | /* redzone */ |
214 | sp -= 128; | 216 | if (config_enabled(CONFIG_X86_64)) |
215 | #endif /* CONFIG_X86_64 */ | 217 | sp -= 128; |
216 | 218 | ||
217 | if (!onsigstack) { | 219 | if (!onsigstack) { |
218 | /* This is the X/Open sanctioned signal stack switching. */ | 220 | /* This is the X/Open sanctioned signal stack switching. */ |
219 | if (ka->sa.sa_flags & SA_ONSTACK) { | 221 | if (ka->sa.sa_flags & SA_ONSTACK) { |
220 | if (current->sas_ss_size) | 222 | if (current->sas_ss_size) |
221 | sp = current->sas_ss_sp + current->sas_ss_size; | 223 | sp = current->sas_ss_sp + current->sas_ss_size; |
222 | } else { | 224 | } else if (config_enabled(CONFIG_X86_32) && |
223 | #ifdef CONFIG_X86_32 | 225 | (regs->ss & 0xffff) != __USER_DS && |
224 | /* This is the legacy signal stack switching. */ | 226 | !(ka->sa.sa_flags & SA_RESTORER) && |
225 | if ((regs->ss & 0xffff) != __USER_DS && | 227 | ka->sa.sa_restorer) { |
226 | !(ka->sa.sa_flags & SA_RESTORER) && | 228 | /* This is the legacy signal stack switching. */ |
227 | ka->sa.sa_restorer) | ||
228 | sp = (unsigned long) ka->sa.sa_restorer; | 229 | sp = (unsigned long) ka->sa.sa_restorer; |
229 | #endif /* CONFIG_X86_32 */ | ||
230 | } | 230 | } |
231 | } | 231 | } |
232 | 232 | ||
233 | if (used_math()) { | 233 | if (used_math()) { |
234 | sp -= sig_xstate_size; | 234 | sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), |
235 | #ifdef CONFIG_X86_64 | 235 | &buf_fx, &math_size); |
236 | sp = round_down(sp, 64); | ||
237 | #endif /* CONFIG_X86_64 */ | ||
238 | *fpstate = (void __user *)sp; | 236 | *fpstate = (void __user *)sp; |
239 | } | 237 | } |
240 | 238 | ||
@@ -247,8 +245,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
247 | if (onsigstack && !likely(on_sig_stack(sp))) | 245 | if (onsigstack && !likely(on_sig_stack(sp))) |
248 | return (void __user *)-1L; | 246 | return (void __user *)-1L; |
249 | 247 | ||
250 | /* save i387 state */ | 248 | /* save i387 and extended state */ |
251 | if (used_math() && save_i387_xstate(*fpstate) < 0) | 249 | if (used_math() && |
250 | save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) | ||
252 | return (void __user *)-1L; | 251 | return (void __user *)-1L; |
253 | 252 | ||
254 | return (void __user *)sp; | 253 | return (void __user *)sp; |
@@ -357,7 +356,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
357 | put_user_ex(sig, &frame->sig); | 356 | put_user_ex(sig, &frame->sig); |
358 | put_user_ex(&frame->info, &frame->pinfo); | 357 | put_user_ex(&frame->info, &frame->pinfo); |
359 | put_user_ex(&frame->uc, &frame->puc); | 358 | put_user_ex(&frame->uc, &frame->puc); |
360 | err |= copy_siginfo_to_user(&frame->info, info); | ||
361 | 359 | ||
362 | /* Create the ucontext. */ | 360 | /* Create the ucontext. */ |
363 | if (cpu_has_xsave) | 361 | if (cpu_has_xsave) |
@@ -369,9 +367,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
369 | put_user_ex(sas_ss_flags(regs->sp), | 367 | put_user_ex(sas_ss_flags(regs->sp), |
370 | &frame->uc.uc_stack.ss_flags); | 368 | &frame->uc.uc_stack.ss_flags); |
371 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | 369 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); |
372 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
373 | regs, set->sig[0]); | ||
374 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
375 | 370 | ||
376 | /* Set up to return from userspace. */ | 371 | /* Set up to return from userspace. */ |
377 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | 372 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); |
@@ -388,6 +383,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
388 | */ | 383 | */ |
389 | put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); | 384 | put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); |
390 | } put_user_catch(err); | 385 | } put_user_catch(err); |
386 | |||
387 | err |= copy_siginfo_to_user(&frame->info, info); | ||
388 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
389 | regs, set->sig[0]); | ||
390 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
391 | 391 | ||
392 | if (err) | 392 | if (err) |
393 | return -EFAULT; | 393 | return -EFAULT; |
@@ -436,8 +436,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
436 | put_user_ex(sas_ss_flags(regs->sp), | 436 | put_user_ex(sas_ss_flags(regs->sp), |
437 | &frame->uc.uc_stack.ss_flags); | 437 | &frame->uc.uc_stack.ss_flags); |
438 | put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | 438 | put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); |
439 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | ||
440 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
441 | 439 | ||
442 | /* Set up to return from userspace. If provided, use a stub | 440 | /* Set up to return from userspace. If provided, use a stub |
443 | already in userspace. */ | 441 | already in userspace. */ |
@@ -450,6 +448,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
450 | } | 448 | } |
451 | } put_user_catch(err); | 449 | } put_user_catch(err); |
452 | 450 | ||
451 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | ||
452 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
453 | |||
453 | if (err) | 454 | if (err) |
454 | return -EFAULT; | 455 | return -EFAULT; |
455 | 456 | ||
@@ -474,6 +475,75 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
474 | } | 475 | } |
475 | #endif /* CONFIG_X86_32 */ | 476 | #endif /* CONFIG_X86_32 */ |
476 | 477 | ||
478 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
479 | siginfo_t *info, compat_sigset_t *set, | ||
480 | struct pt_regs *regs) | ||
481 | { | ||
482 | #ifdef CONFIG_X86_X32_ABI | ||
483 | struct rt_sigframe_x32 __user *frame; | ||
484 | void __user *restorer; | ||
485 | int err = 0; | ||
486 | void __user *fpstate = NULL; | ||
487 | |||
488 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | ||
489 | |||
490 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
491 | return -EFAULT; | ||
492 | |||
493 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
494 | if (copy_siginfo_to_user32(&frame->info, info)) | ||
495 | return -EFAULT; | ||
496 | } | ||
497 | |||
498 | put_user_try { | ||
499 | /* Create the ucontext. */ | ||
500 | if (cpu_has_xsave) | ||
501 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
502 | else | ||
503 | put_user_ex(0, &frame->uc.uc_flags); | ||
504 | put_user_ex(0, &frame->uc.uc_link); | ||
505 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
506 | put_user_ex(sas_ss_flags(regs->sp), | ||
507 | &frame->uc.uc_stack.ss_flags); | ||
508 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
509 | put_user_ex(0, &frame->uc.uc__pad0); | ||
510 | |||
511 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
512 | restorer = ka->sa.sa_restorer; | ||
513 | } else { | ||
514 | /* could use a vstub here */ | ||
515 | restorer = NULL; | ||
516 | err |= -EFAULT; | ||
517 | } | ||
518 | put_user_ex(restorer, &frame->pretcode); | ||
519 | } put_user_catch(err); | ||
520 | |||
521 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
522 | regs, set->sig[0]); | ||
523 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
524 | |||
525 | if (err) | ||
526 | return -EFAULT; | ||
527 | |||
528 | /* Set up registers for signal handler */ | ||
529 | regs->sp = (unsigned long) frame; | ||
530 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
531 | |||
532 | /* We use the x32 calling convention here... */ | ||
533 | regs->di = sig; | ||
534 | regs->si = (unsigned long) &frame->info; | ||
535 | regs->dx = (unsigned long) &frame->uc; | ||
536 | |||
537 | loadsegment(ds, __USER_DS); | ||
538 | loadsegment(es, __USER_DS); | ||
539 | |||
540 | regs->cs = __USER_CS; | ||
541 | regs->ss = __USER_DS; | ||
542 | #endif /* CONFIG_X86_X32_ABI */ | ||
543 | |||
544 | return 0; | ||
545 | } | ||
546 | |||
477 | #ifdef CONFIG_X86_32 | 547 | #ifdef CONFIG_X86_32 |
478 | /* | 548 | /* |
479 | * Atomically swap in the new signal mask, and wait for a signal. | 549 | * Atomically swap in the new signal mask, and wait for a signal. |
@@ -612,55 +682,22 @@ static int signr_convert(int sig) | |||
612 | return sig; | 682 | return sig; |
613 | } | 683 | } |
614 | 684 | ||
615 | #ifdef CONFIG_X86_32 | ||
616 | |||
617 | #define is_ia32 1 | ||
618 | #define ia32_setup_frame __setup_frame | ||
619 | #define ia32_setup_rt_frame __setup_rt_frame | ||
620 | |||
621 | #else /* !CONFIG_X86_32 */ | ||
622 | |||
623 | #ifdef CONFIG_IA32_EMULATION | ||
624 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
625 | #else /* !CONFIG_IA32_EMULATION */ | ||
626 | #define is_ia32 0 | ||
627 | #endif /* CONFIG_IA32_EMULATION */ | ||
628 | |||
629 | #ifdef CONFIG_X86_X32_ABI | ||
630 | #define is_x32 test_thread_flag(TIF_X32) | ||
631 | |||
632 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
633 | siginfo_t *info, compat_sigset_t *set, | ||
634 | struct pt_regs *regs); | ||
635 | #else /* !CONFIG_X86_X32_ABI */ | ||
636 | #define is_x32 0 | ||
637 | #endif /* CONFIG_X86_X32_ABI */ | ||
638 | |||
639 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
640 | sigset_t *set, struct pt_regs *regs); | ||
641 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
642 | sigset_t *set, struct pt_regs *regs); | ||
643 | |||
644 | #endif /* CONFIG_X86_32 */ | ||
645 | |||
646 | static int | 685 | static int |
647 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 686 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
648 | struct pt_regs *regs) | 687 | struct pt_regs *regs) |
649 | { | 688 | { |
650 | int usig = signr_convert(sig); | 689 | int usig = signr_convert(sig); |
651 | sigset_t *set = sigmask_to_save(); | 690 | sigset_t *set = sigmask_to_save(); |
691 | compat_sigset_t *cset = (compat_sigset_t *) set; | ||
652 | 692 | ||
653 | /* Set up the stack frame */ | 693 | /* Set up the stack frame */ |
654 | if (is_ia32) { | 694 | if (is_ia32_frame()) { |
655 | if (ka->sa.sa_flags & SA_SIGINFO) | 695 | if (ka->sa.sa_flags & SA_SIGINFO) |
656 | return ia32_setup_rt_frame(usig, ka, info, set, regs); | 696 | return ia32_setup_rt_frame(usig, ka, info, cset, regs); |
657 | else | 697 | else |
658 | return ia32_setup_frame(usig, ka, set, regs); | 698 | return ia32_setup_frame(usig, ka, cset, regs); |
659 | #ifdef CONFIG_X86_X32_ABI | 699 | } else if (is_x32_frame()) { |
660 | } else if (is_x32) { | 700 | return x32_setup_rt_frame(usig, ka, info, cset, regs); |
661 | return x32_setup_rt_frame(usig, ka, info, | ||
662 | (compat_sigset_t *)set, regs); | ||
663 | #endif | ||
664 | } else { | 701 | } else { |
665 | return __setup_rt_frame(sig, ka, info, set, regs); | 702 | return __setup_rt_frame(sig, ka, info, set, regs); |
666 | } | 703 | } |
@@ -779,6 +816,8 @@ static void do_signal(struct pt_regs *regs) | |||
779 | void | 816 | void |
780 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 817 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
781 | { | 818 | { |
819 | rcu_user_exit(); | ||
820 | |||
782 | #ifdef CONFIG_X86_MCE | 821 | #ifdef CONFIG_X86_MCE |
783 | /* notify userspace of pending MCEs */ | 822 | /* notify userspace of pending MCEs */ |
784 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 823 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
@@ -804,6 +843,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
804 | #ifdef CONFIG_X86_32 | 843 | #ifdef CONFIG_X86_32 |
805 | clear_thread_flag(TIF_IRET); | 844 | clear_thread_flag(TIF_IRET); |
806 | #endif /* CONFIG_X86_32 */ | 845 | #endif /* CONFIG_X86_32 */ |
846 | |||
847 | rcu_user_enter(); | ||
807 | } | 848 | } |
808 | 849 | ||
809 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 850 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
@@ -824,72 +865,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
824 | } | 865 | } |
825 | 866 | ||
826 | #ifdef CONFIG_X86_X32_ABI | 867 | #ifdef CONFIG_X86_X32_ABI |
827 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
828 | siginfo_t *info, compat_sigset_t *set, | ||
829 | struct pt_regs *regs) | ||
830 | { | ||
831 | struct rt_sigframe_x32 __user *frame; | ||
832 | void __user *restorer; | ||
833 | int err = 0; | ||
834 | void __user *fpstate = NULL; | ||
835 | |||
836 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | ||
837 | |||
838 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
839 | return -EFAULT; | ||
840 | |||
841 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
842 | if (copy_siginfo_to_user32(&frame->info, info)) | ||
843 | return -EFAULT; | ||
844 | } | ||
845 | |||
846 | put_user_try { | ||
847 | /* Create the ucontext. */ | ||
848 | if (cpu_has_xsave) | ||
849 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
850 | else | ||
851 | put_user_ex(0, &frame->uc.uc_flags); | ||
852 | put_user_ex(0, &frame->uc.uc_link); | ||
853 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
854 | put_user_ex(sas_ss_flags(regs->sp), | ||
855 | &frame->uc.uc_stack.ss_flags); | ||
856 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
857 | put_user_ex(0, &frame->uc.uc__pad0); | ||
858 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
859 | regs, set->sig[0]); | ||
860 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
861 | |||
862 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
863 | restorer = ka->sa.sa_restorer; | ||
864 | } else { | ||
865 | /* could use a vstub here */ | ||
866 | restorer = NULL; | ||
867 | err |= -EFAULT; | ||
868 | } | ||
869 | put_user_ex(restorer, &frame->pretcode); | ||
870 | } put_user_catch(err); | ||
871 | |||
872 | if (err) | ||
873 | return -EFAULT; | ||
874 | |||
875 | /* Set up registers for signal handler */ | ||
876 | regs->sp = (unsigned long) frame; | ||
877 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
878 | |||
879 | /* We use the x32 calling convention here... */ | ||
880 | regs->di = sig; | ||
881 | regs->si = (unsigned long) &frame->info; | ||
882 | regs->dx = (unsigned long) &frame->uc; | ||
883 | |||
884 | loadsegment(ds, __USER_DS); | ||
885 | loadsegment(es, __USER_DS); | ||
886 | |||
887 | regs->cs = __USER_CS; | ||
888 | regs->ss = __USER_DS; | ||
889 | |||
890 | return 0; | ||
891 | } | ||
892 | |||
893 | asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) | 868 | asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) |
894 | { | 869 | { |
895 | struct rt_sigframe_x32 __user *frame; | 870 | struct rt_sigframe_x32 __user *frame; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c5a8c314c02..c80a33bc528b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
665 | unsigned long boot_error = 0; | 665 | unsigned long boot_error = 0; |
666 | int timeout; | 666 | int timeout; |
667 | 667 | ||
668 | alternatives_smp_switch(1); | 668 | /* Just in case we booted with a single CPU. */ |
669 | alternatives_enable_smp(); | ||
669 | 670 | ||
670 | idle->thread.sp = (unsigned long) (((struct pt_regs *) | 671 | idle->thread.sp = (unsigned long) (((struct pt_regs *) |
671 | (THREAD_SIZE + task_stack_page(idle))) - 1); | 672 | (THREAD_SIZE + task_stack_page(idle))) - 1); |
@@ -1053,20 +1054,6 @@ out: | |||
1053 | preempt_enable(); | 1054 | preempt_enable(); |
1054 | } | 1055 | } |
1055 | 1056 | ||
1056 | void arch_disable_nonboot_cpus_begin(void) | ||
1057 | { | ||
1058 | /* | ||
1059 | * Avoid the smp alternatives switch during the disable_nonboot_cpus(). | ||
1060 | * In the suspend path, we will be back in the SMP mode shortly anyways. | ||
1061 | */ | ||
1062 | skip_smp_alternatives = true; | ||
1063 | } | ||
1064 | |||
1065 | void arch_disable_nonboot_cpus_end(void) | ||
1066 | { | ||
1067 | skip_smp_alternatives = false; | ||
1068 | } | ||
1069 | |||
1070 | void arch_enable_nonboot_cpus_begin(void) | 1057 | void arch_enable_nonboot_cpus_begin(void) |
1071 | { | 1058 | { |
1072 | set_mtrr_aps_delayed_init(); | 1059 | set_mtrr_aps_delayed_init(); |
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu) | |||
1256 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1243 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
1257 | if (system_state == SYSTEM_RUNNING) | 1244 | if (system_state == SYSTEM_RUNNING) |
1258 | pr_info("CPU %u is now offline\n", cpu); | 1245 | pr_info("CPU %u is now offline\n", cpu); |
1259 | |||
1260 | if (1 == num_online_cpus()) | ||
1261 | alternatives_smp_switch(0); | ||
1262 | return; | 1246 | return; |
1263 | } | 1247 | } |
1264 | msleep(100); | 1248 | msleep(100); |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d1161488..cd3b2438a980 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child) | |||
157 | return 1; | 157 | return 1; |
158 | } | 158 | } |
159 | 159 | ||
160 | void set_task_blockstep(struct task_struct *task, bool on) | ||
161 | { | ||
162 | unsigned long debugctl; | ||
163 | |||
164 | /* | ||
165 | * Ensure irq/preemption can't change debugctl in between. | ||
166 | * Note also that both TIF_BLOCKSTEP and debugctl should | ||
167 | * be changed atomically wrt preemption. | ||
168 | * FIXME: this means that set/clear TIF_BLOCKSTEP is simply | ||
169 | * wrong if task != current, SIGKILL can wakeup the stopped | ||
170 | * tracee and set/clear can play with the running task, this | ||
171 | * can confuse the next __switch_to_xtra(). | ||
172 | */ | ||
173 | local_irq_disable(); | ||
174 | debugctl = get_debugctlmsr(); | ||
175 | if (on) { | ||
176 | debugctl |= DEBUGCTLMSR_BTF; | ||
177 | set_tsk_thread_flag(task, TIF_BLOCKSTEP); | ||
178 | } else { | ||
179 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
180 | clear_tsk_thread_flag(task, TIF_BLOCKSTEP); | ||
181 | } | ||
182 | if (task == current) | ||
183 | update_debugctlmsr(debugctl); | ||
184 | local_irq_enable(); | ||
185 | } | ||
186 | |||
160 | /* | 187 | /* |
161 | * Enable single or block step. | 188 | * Enable single or block step. |
162 | */ | 189 | */ |
@@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block) | |||
169 | * So no one should try to use debugger block stepping in a program | 196 | * So no one should try to use debugger block stepping in a program |
170 | * that uses user-mode single stepping itself. | 197 | * that uses user-mode single stepping itself. |
171 | */ | 198 | */ |
172 | if (enable_single_step(child) && block) { | 199 | if (enable_single_step(child) && block) |
173 | unsigned long debugctl = get_debugctlmsr(); | 200 | set_task_blockstep(child, true); |
174 | 201 | else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) | |
175 | debugctl |= DEBUGCTLMSR_BTF; | 202 | set_task_blockstep(child, false); |
176 | update_debugctlmsr(debugctl); | ||
177 | set_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
178 | } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { | ||
179 | unsigned long debugctl = get_debugctlmsr(); | ||
180 | |||
181 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
182 | update_debugctlmsr(debugctl); | ||
183 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
184 | } | ||
185 | } | 203 | } |
186 | 204 | ||
187 | void user_enable_single_step(struct task_struct *child) | 205 | void user_enable_single_step(struct task_struct *child) |
@@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child) | |||
199 | /* | 217 | /* |
200 | * Make sure block stepping (BTF) is disabled. | 218 | * Make sure block stepping (BTF) is disabled. |
201 | */ | 219 | */ |
202 | if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { | 220 | if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) |
203 | unsigned long debugctl = get_debugctlmsr(); | 221 | set_task_blockstep(child, false); |
204 | |||
205 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
206 | update_debugctlmsr(debugctl); | ||
207 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
208 | } | ||
209 | 222 | ||
210 | /* Always clear TIF_SINGLESTEP... */ | 223 | /* Always clear TIF_SINGLESTEP... */ |
211 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | 224 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c9369..8276dc6794cc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/i387.h> | 55 | #include <asm/i387.h> |
56 | #include <asm/fpu-internal.h> | 56 | #include <asm/fpu-internal.h> |
57 | #include <asm/mce.h> | 57 | #include <asm/mce.h> |
58 | #include <asm/rcu.h> | ||
58 | 59 | ||
59 | #include <asm/mach_traps.h> | 60 | #include <asm/mach_traps.h> |
60 | 61 | ||
@@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
107 | dec_preempt_count(); | 108 | dec_preempt_count(); |
108 | } | 109 | } |
109 | 110 | ||
110 | static void __kprobes | 111 | static int __kprobes |
111 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | 112 | do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, |
112 | long error_code, siginfo_t *info) | 113 | struct pt_regs *regs, long error_code) |
113 | { | 114 | { |
114 | struct task_struct *tsk = current; | ||
115 | |||
116 | #ifdef CONFIG_X86_32 | 115 | #ifdef CONFIG_X86_32 |
117 | if (regs->flags & X86_VM_MASK) { | 116 | if (regs->flags & X86_VM_MASK) { |
118 | /* | 117 | /* |
119 | * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. | 118 | * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. |
120 | * On nmi (interrupt 2), do_trap should not be called. | 119 | * On nmi (interrupt 2), do_trap should not be called. |
121 | */ | 120 | */ |
122 | if (trapnr < X86_TRAP_UD) | 121 | if (trapnr < X86_TRAP_UD) { |
123 | goto vm86_trap; | 122 | if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, |
124 | goto trap_signal; | 123 | error_code, trapnr)) |
124 | return 0; | ||
125 | } | ||
126 | return -1; | ||
125 | } | 127 | } |
126 | #endif | 128 | #endif |
129 | if (!user_mode(regs)) { | ||
130 | if (!fixup_exception(regs)) { | ||
131 | tsk->thread.error_code = error_code; | ||
132 | tsk->thread.trap_nr = trapnr; | ||
133 | die(str, regs, error_code); | ||
134 | } | ||
135 | return 0; | ||
136 | } | ||
127 | 137 | ||
128 | if (!user_mode(regs)) | 138 | return -1; |
129 | goto kernel_trap; | 139 | } |
130 | 140 | ||
131 | #ifdef CONFIG_X86_32 | 141 | static void __kprobes |
132 | trap_signal: | 142 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
133 | #endif | 143 | long error_code, siginfo_t *info) |
144 | { | ||
145 | struct task_struct *tsk = current; | ||
146 | |||
147 | |||
148 | if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) | ||
149 | return; | ||
134 | /* | 150 | /* |
135 | * We want error_code and trap_nr set for userspace faults and | 151 | * We want error_code and trap_nr set for userspace faults and |
136 | * kernelspace faults which result in die(), but not | 152 | * kernelspace faults which result in die(), but not |
@@ -158,33 +174,20 @@ trap_signal: | |||
158 | force_sig_info(signr, info, tsk); | 174 | force_sig_info(signr, info, tsk); |
159 | else | 175 | else |
160 | force_sig(signr, tsk); | 176 | force_sig(signr, tsk); |
161 | return; | ||
162 | |||
163 | kernel_trap: | ||
164 | if (!fixup_exception(regs)) { | ||
165 | tsk->thread.error_code = error_code; | ||
166 | tsk->thread.trap_nr = trapnr; | ||
167 | die(str, regs, error_code); | ||
168 | } | ||
169 | return; | ||
170 | |||
171 | #ifdef CONFIG_X86_32 | ||
172 | vm86_trap: | ||
173 | if (handle_vm86_trap((struct kernel_vm86_regs *) regs, | ||
174 | error_code, trapnr)) | ||
175 | goto trap_signal; | ||
176 | return; | ||
177 | #endif | ||
178 | } | 177 | } |
179 | 178 | ||
180 | #define DO_ERROR(trapnr, signr, str, name) \ | 179 | #define DO_ERROR(trapnr, signr, str, name) \ |
181 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | 180 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
182 | { \ | 181 | { \ |
183 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 182 | exception_enter(regs); \ |
184 | == NOTIFY_STOP) \ | 183 | if (notify_die(DIE_TRAP, str, regs, error_code, \ |
184 | trapnr, signr) == NOTIFY_STOP) { \ | ||
185 | exception_exit(regs); \ | ||
185 | return; \ | 186 | return; \ |
187 | } \ | ||
186 | conditional_sti(regs); \ | 188 | conditional_sti(regs); \ |
187 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | 189 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ |
190 | exception_exit(regs); \ | ||
188 | } | 191 | } |
189 | 192 | ||
190 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 193 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
@@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | |||
195 | info.si_errno = 0; \ | 198 | info.si_errno = 0; \ |
196 | info.si_code = sicode; \ | 199 | info.si_code = sicode; \ |
197 | info.si_addr = (void __user *)siaddr; \ | 200 | info.si_addr = (void __user *)siaddr; \ |
198 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 201 | exception_enter(regs); \ |
199 | == NOTIFY_STOP) \ | 202 | if (notify_die(DIE_TRAP, str, regs, error_code, \ |
203 | trapnr, signr) == NOTIFY_STOP) { \ | ||
204 | exception_exit(regs); \ | ||
200 | return; \ | 205 | return; \ |
206 | } \ | ||
201 | conditional_sti(regs); \ | 207 | conditional_sti(regs); \ |
202 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | 208 | do_trap(trapnr, signr, str, regs, error_code, &info); \ |
209 | exception_exit(regs); \ | ||
203 | } | 210 | } |
204 | 211 | ||
205 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, | 212 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, |
@@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, | |||
222 | /* Runs on IST stack */ | 229 | /* Runs on IST stack */ |
223 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | 230 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) |
224 | { | 231 | { |
232 | exception_enter(regs); | ||
225 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | 233 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, |
226 | X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) | 234 | X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { |
227 | return; | 235 | preempt_conditional_sti(regs); |
228 | preempt_conditional_sti(regs); | 236 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); |
229 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); | 237 | preempt_conditional_cli(regs); |
230 | preempt_conditional_cli(regs); | 238 | } |
239 | exception_exit(regs); | ||
231 | } | 240 | } |
232 | 241 | ||
233 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | 242 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
@@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
235 | static const char str[] = "double fault"; | 244 | static const char str[] = "double fault"; |
236 | struct task_struct *tsk = current; | 245 | struct task_struct *tsk = current; |
237 | 246 | ||
247 | exception_enter(regs); | ||
238 | /* Return not checked because double check cannot be ignored */ | 248 | /* Return not checked because double check cannot be ignored */ |
239 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 249 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
240 | 250 | ||
@@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
255 | { | 265 | { |
256 | struct task_struct *tsk; | 266 | struct task_struct *tsk; |
257 | 267 | ||
268 | exception_enter(regs); | ||
258 | conditional_sti(regs); | 269 | conditional_sti(regs); |
259 | 270 | ||
260 | #ifdef CONFIG_X86_32 | 271 | #ifdef CONFIG_X86_32 |
261 | if (regs->flags & X86_VM_MASK) | 272 | if (regs->flags & X86_VM_MASK) { |
262 | goto gp_in_vm86; | 273 | local_irq_enable(); |
274 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
275 | goto exit; | ||
276 | } | ||
263 | #endif | 277 | #endif |
264 | 278 | ||
265 | tsk = current; | 279 | tsk = current; |
266 | if (!user_mode(regs)) | 280 | if (!user_mode(regs)) { |
267 | goto gp_in_kernel; | 281 | if (fixup_exception(regs)) |
282 | goto exit; | ||
283 | |||
284 | tsk->thread.error_code = error_code; | ||
285 | tsk->thread.trap_nr = X86_TRAP_GP; | ||
286 | if (notify_die(DIE_GPF, "general protection fault", regs, error_code, | ||
287 | X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) | ||
288 | die("general protection fault", regs, error_code); | ||
289 | goto exit; | ||
290 | } | ||
268 | 291 | ||
269 | tsk->thread.error_code = error_code; | 292 | tsk->thread.error_code = error_code; |
270 | tsk->thread.trap_nr = X86_TRAP_GP; | 293 | tsk->thread.trap_nr = X86_TRAP_GP; |
@@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
279 | } | 302 | } |
280 | 303 | ||
281 | force_sig(SIGSEGV, tsk); | 304 | force_sig(SIGSEGV, tsk); |
282 | return; | 305 | exit: |
283 | 306 | exception_exit(regs); | |
284 | #ifdef CONFIG_X86_32 | ||
285 | gp_in_vm86: | ||
286 | local_irq_enable(); | ||
287 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
288 | return; | ||
289 | #endif | ||
290 | |||
291 | gp_in_kernel: | ||
292 | if (fixup_exception(regs)) | ||
293 | return; | ||
294 | |||
295 | tsk->thread.error_code = error_code; | ||
296 | tsk->thread.trap_nr = X86_TRAP_GP; | ||
297 | if (notify_die(DIE_GPF, "general protection fault", regs, error_code, | ||
298 | X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) | ||
299 | return; | ||
300 | die("general protection fault", regs, error_code); | ||
301 | } | 307 | } |
302 | 308 | ||
303 | /* May run on IST stack. */ | 309 | /* May run on IST stack. */ |
@@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co | |||
312 | ftrace_int3_handler(regs)) | 318 | ftrace_int3_handler(regs)) |
313 | return; | 319 | return; |
314 | #endif | 320 | #endif |
321 | exception_enter(regs); | ||
315 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 322 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
316 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 323 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
317 | SIGTRAP) == NOTIFY_STOP) | 324 | SIGTRAP) == NOTIFY_STOP) |
318 | return; | 325 | goto exit; |
319 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | 326 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ |
320 | 327 | ||
321 | if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 328 | if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
322 | SIGTRAP) == NOTIFY_STOP) | 329 | SIGTRAP) == NOTIFY_STOP) |
323 | return; | 330 | goto exit; |
324 | 331 | ||
325 | /* | 332 | /* |
326 | * Let others (NMI) know that the debug stack is in use | 333 | * Let others (NMI) know that the debug stack is in use |
@@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co | |||
331 | do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); | 338 | do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); |
332 | preempt_conditional_cli(regs); | 339 | preempt_conditional_cli(regs); |
333 | debug_stack_usage_dec(); | 340 | debug_stack_usage_dec(); |
341 | exit: | ||
342 | exception_exit(regs); | ||
334 | } | 343 | } |
335 | 344 | ||
336 | #ifdef CONFIG_X86_64 | 345 | #ifdef CONFIG_X86_64 |
@@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
391 | unsigned long dr6; | 400 | unsigned long dr6; |
392 | int si_code; | 401 | int si_code; |
393 | 402 | ||
403 | exception_enter(regs); | ||
404 | |||
394 | get_debugreg(dr6, 6); | 405 | get_debugreg(dr6, 6); |
395 | 406 | ||
396 | /* Filter out all the reserved bits which are preset to 1 */ | 407 | /* Filter out all the reserved bits which are preset to 1 */ |
@@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
406 | 417 | ||
407 | /* Catch kmemcheck conditions first of all! */ | 418 | /* Catch kmemcheck conditions first of all! */ |
408 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) | 419 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
409 | return; | 420 | goto exit; |
410 | 421 | ||
411 | /* DR6 may or may not be cleared by the CPU */ | 422 | /* DR6 may or may not be cleared by the CPU */ |
412 | set_debugreg(0, 6); | 423 | set_debugreg(0, 6); |
@@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
421 | 432 | ||
422 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | 433 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, |
423 | SIGTRAP) == NOTIFY_STOP) | 434 | SIGTRAP) == NOTIFY_STOP) |
424 | return; | 435 | goto exit; |
425 | 436 | ||
426 | /* | 437 | /* |
427 | * Let others (NMI) know that the debug stack is in use | 438 | * Let others (NMI) know that the debug stack is in use |
@@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
437 | X86_TRAP_DB); | 448 | X86_TRAP_DB); |
438 | preempt_conditional_cli(regs); | 449 | preempt_conditional_cli(regs); |
439 | debug_stack_usage_dec(); | 450 | debug_stack_usage_dec(); |
440 | return; | 451 | goto exit; |
441 | } | 452 | } |
442 | 453 | ||
443 | /* | 454 | /* |
@@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
458 | preempt_conditional_cli(regs); | 469 | preempt_conditional_cli(regs); |
459 | debug_stack_usage_dec(); | 470 | debug_stack_usage_dec(); |
460 | 471 | ||
461 | return; | 472 | exit: |
473 | exception_exit(regs); | ||
462 | } | 474 | } |
463 | 475 | ||
464 | /* | 476 | /* |
@@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | |||
555 | #ifdef CONFIG_X86_32 | 567 | #ifdef CONFIG_X86_32 |
556 | ignore_fpu_irq = 1; | 568 | ignore_fpu_irq = 1; |
557 | #endif | 569 | #endif |
558 | 570 | exception_enter(regs); | |
559 | math_error(regs, error_code, X86_TRAP_MF); | 571 | math_error(regs, error_code, X86_TRAP_MF); |
572 | exception_exit(regs); | ||
560 | } | 573 | } |
561 | 574 | ||
562 | dotraplinkage void | 575 | dotraplinkage void |
563 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 576 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) |
564 | { | 577 | { |
578 | exception_enter(regs); | ||
565 | math_error(regs, error_code, X86_TRAP_XF); | 579 | math_error(regs, error_code, X86_TRAP_XF); |
580 | exception_exit(regs); | ||
566 | } | 581 | } |
567 | 582 | ||
568 | dotraplinkage void | 583 | dotraplinkage void |
@@ -613,11 +628,12 @@ void math_state_restore(void) | |||
613 | } | 628 | } |
614 | 629 | ||
615 | __thread_fpu_begin(tsk); | 630 | __thread_fpu_begin(tsk); |
631 | |||
616 | /* | 632 | /* |
617 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | 633 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. |
618 | */ | 634 | */ |
619 | if (unlikely(restore_fpu_checking(tsk))) { | 635 | if (unlikely(restore_fpu_checking(tsk))) { |
620 | __thread_fpu_end(tsk); | 636 | drop_init_fpu(tsk); |
621 | force_sig(SIGSEGV, tsk); | 637 | force_sig(SIGSEGV, tsk); |
622 | return; | 638 | return; |
623 | } | 639 | } |
@@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore); | |||
629 | dotraplinkage void __kprobes | 645 | dotraplinkage void __kprobes |
630 | do_device_not_available(struct pt_regs *regs, long error_code) | 646 | do_device_not_available(struct pt_regs *regs, long error_code) |
631 | { | 647 | { |
648 | exception_enter(regs); | ||
649 | BUG_ON(use_eager_fpu()); | ||
650 | |||
632 | #ifdef CONFIG_MATH_EMULATION | 651 | #ifdef CONFIG_MATH_EMULATION |
633 | if (read_cr0() & X86_CR0_EM) { | 652 | if (read_cr0() & X86_CR0_EM) { |
634 | struct math_emu_info info = { }; | 653 | struct math_emu_info info = { }; |
@@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
637 | 656 | ||
638 | info.regs = regs; | 657 | info.regs = regs; |
639 | math_emulate(&info); | 658 | math_emulate(&info); |
659 | exception_exit(regs); | ||
640 | return; | 660 | return; |
641 | } | 661 | } |
642 | #endif | 662 | #endif |
@@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
644 | #ifdef CONFIG_X86_32 | 664 | #ifdef CONFIG_X86_32 |
645 | conditional_sti(regs); | 665 | conditional_sti(regs); |
646 | #endif | 666 | #endif |
667 | exception_exit(regs); | ||
647 | } | 668 | } |
648 | 669 | ||
649 | #ifdef CONFIG_X86_32 | 670 | #ifdef CONFIG_X86_32 |
650 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | 671 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) |
651 | { | 672 | { |
652 | siginfo_t info; | 673 | siginfo_t info; |
674 | |||
675 | exception_enter(regs); | ||
653 | local_irq_enable(); | 676 | local_irq_enable(); |
654 | 677 | ||
655 | info.si_signo = SIGILL; | 678 | info.si_signo = SIGILL; |
@@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
657 | info.si_code = ILL_BADSTK; | 680 | info.si_code = ILL_BADSTK; |
658 | info.si_addr = NULL; | 681 | info.si_addr = NULL; |
659 | if (notify_die(DIE_TRAP, "iret exception", regs, error_code, | 682 | if (notify_die(DIE_TRAP, "iret exception", regs, error_code, |
660 | X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) | 683 | X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { |
661 | return; | 684 | do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, |
662 | do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, | 685 | &info); |
663 | &info); | 686 | } |
687 | exception_exit(regs); | ||
664 | } | 688 | } |
665 | #endif | 689 | #endif |
666 | 690 | ||
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa7..9538f00827a9 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -41,6 +41,9 @@ | |||
41 | /* Adjust the return address of a call insn */ | 41 | /* Adjust the return address of a call insn */ |
42 | #define UPROBE_FIX_CALL 0x2 | 42 | #define UPROBE_FIX_CALL 0x2 |
43 | 43 | ||
44 | /* Instruction will modify TF, don't change it */ | ||
45 | #define UPROBE_FIX_SETF 0x4 | ||
46 | |||
44 | #define UPROBE_FIX_RIP_AX 0x8000 | 47 | #define UPROBE_FIX_RIP_AX 0x8000 |
45 | #define UPROBE_FIX_RIP_CX 0x4000 | 48 | #define UPROBE_FIX_RIP_CX 0x4000 |
46 | 49 | ||
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) | |||
239 | insn_get_opcode(insn); /* should be a nop */ | 242 | insn_get_opcode(insn); /* should be a nop */ |
240 | 243 | ||
241 | switch (OPCODE1(insn)) { | 244 | switch (OPCODE1(insn)) { |
245 | case 0x9d: | ||
246 | /* popf */ | ||
247 | auprobe->fixups |= UPROBE_FIX_SETF; | ||
248 | break; | ||
242 | case 0xc3: /* ret/lret */ | 249 | case 0xc3: /* ret/lret */ |
243 | case 0xcb: | 250 | case 0xcb: |
244 | case 0xc2: | 251 | case 0xc2: |
@@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
646 | * Skip these instructions as per the currently known x86 ISA. | 653 | * Skip these instructions as per the currently known x86 ISA. |
647 | * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } | 654 | * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } |
648 | */ | 655 | */ |
649 | bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | 656 | static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) |
650 | { | 657 | { |
651 | int i; | 658 | int i; |
652 | 659 | ||
@@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
673 | } | 680 | } |
674 | return false; | 681 | return false; |
675 | } | 682 | } |
683 | |||
684 | bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
685 | { | ||
686 | bool ret = __skip_sstep(auprobe, regs); | ||
687 | if (ret && (regs->flags & X86_EFLAGS_TF)) | ||
688 | send_sig(SIGTRAP, current, 0); | ||
689 | return ret; | ||
690 | } | ||
691 | |||
692 | void arch_uprobe_enable_step(struct arch_uprobe *auprobe) | ||
693 | { | ||
694 | struct task_struct *task = current; | ||
695 | struct arch_uprobe_task *autask = &task->utask->autask; | ||
696 | struct pt_regs *regs = task_pt_regs(task); | ||
697 | |||
698 | autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); | ||
699 | |||
700 | regs->flags |= X86_EFLAGS_TF; | ||
701 | if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) | ||
702 | set_task_blockstep(task, false); | ||
703 | } | ||
704 | |||
705 | void arch_uprobe_disable_step(struct arch_uprobe *auprobe) | ||
706 | { | ||
707 | struct task_struct *task = current; | ||
708 | struct arch_uprobe_task *autask = &task->utask->autask; | ||
709 | bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); | ||
710 | struct pt_regs *regs = task_pt_regs(task); | ||
711 | /* | ||
712 | * The state of TIF_BLOCKSTEP was not saved so we can get an extra | ||
713 | * SIGTRAP if we do not clear TF. We need to examine the opcode to | ||
714 | * make it right. | ||
715 | */ | ||
716 | if (unlikely(trapped)) { | ||
717 | if (!autask->saved_tf) | ||
718 | regs->flags &= ~X86_EFLAGS_TF; | ||
719 | } else { | ||
720 | if (autask->saved_tf) | ||
721 | send_sig(SIGTRAP, task, 0); | ||
722 | else if (!(auprobe->fixups & UPROBE_FIX_SETF)) | ||
723 | regs->flags &= ~X86_EFLAGS_TF; | ||
724 | } | ||
725 | } | ||
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927c..1330dd102950 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -13,9 +13,13 @@ | |||
13 | #include <asm/ftrace.h> | 13 | #include <asm/ftrace.h> |
14 | 14 | ||
15 | #ifdef CONFIG_FUNCTION_TRACER | 15 | #ifdef CONFIG_FUNCTION_TRACER |
16 | /* mcount is defined in assembly */ | 16 | /* mcount and __fentry__ are defined in assembly */ |
17 | #ifdef CC_USING_FENTRY | ||
18 | EXPORT_SYMBOL(__fentry__); | ||
19 | #else | ||
17 | EXPORT_SYMBOL(mcount); | 20 | EXPORT_SYMBOL(mcount); |
18 | #endif | 21 | #endif |
22 | #endif | ||
19 | 23 | ||
20 | EXPORT_SYMBOL(__get_user_1); | 24 | EXPORT_SYMBOL(__get_user_1); |
21 | EXPORT_SYMBOL(__get_user_2); | 25 | EXPORT_SYMBOL(__get_user_2); |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 9f3167e891ef..7a3d075a814a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -26,7 +26,6 @@ | |||
26 | 26 | ||
27 | void __cpuinit x86_init_noop(void) { } | 27 | void __cpuinit x86_init_noop(void) { } |
28 | void __init x86_init_uint_noop(unsigned int unused) { } | 28 | void __init x86_init_uint_noop(unsigned int unused) { } |
29 | void __init x86_init_pgd_noop(pgd_t *unused) { } | ||
30 | int __init iommu_init_noop(void) { return 0; } | 29 | int __init iommu_init_noop(void) { return 0; } |
31 | void iommu_shutdown_noop(void) { } | 30 | void iommu_shutdown_noop(void) { } |
32 | 31 | ||
@@ -68,8 +67,7 @@ struct x86_init_ops x86_init __initdata = { | |||
68 | }, | 67 | }, |
69 | 68 | ||
70 | .paging = { | 69 | .paging = { |
71 | .pagetable_setup_start = native_pagetable_setup_start, | 70 | .pagetable_init = native_pagetable_init, |
72 | .pagetable_setup_done = native_pagetable_setup_done, | ||
73 | }, | 71 | }, |
74 | 72 | ||
75 | .timers = { | 73 | .timers = { |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 3d3e20709119..ada87a329edc 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -10,9 +10,7 @@ | |||
10 | #include <linux/compat.h> | 10 | #include <linux/compat.h> |
11 | #include <asm/i387.h> | 11 | #include <asm/i387.h> |
12 | #include <asm/fpu-internal.h> | 12 | #include <asm/fpu-internal.h> |
13 | #ifdef CONFIG_IA32_EMULATION | 13 | #include <asm/sigframe.h> |
14 | #include <asm/sigcontext32.h> | ||
15 | #endif | ||
16 | #include <asm/xcr.h> | 14 | #include <asm/xcr.h> |
17 | 15 | ||
18 | /* | 16 | /* |
@@ -23,13 +21,9 @@ u64 pcntxt_mask; | |||
23 | /* | 21 | /* |
24 | * Represents init state for the supported extended state. | 22 | * Represents init state for the supported extended state. |
25 | */ | 23 | */ |
26 | static struct xsave_struct *init_xstate_buf; | 24 | struct xsave_struct *init_xstate_buf; |
27 | |||
28 | struct _fpx_sw_bytes fx_sw_reserved; | ||
29 | #ifdef CONFIG_IA32_EMULATION | ||
30 | struct _fpx_sw_bytes fx_sw_reserved_ia32; | ||
31 | #endif | ||
32 | 25 | ||
26 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; | ||
33 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | 27 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; |
34 | 28 | ||
35 | /* | 29 | /* |
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | |||
44 | */ | 38 | */ |
45 | void __sanitize_i387_state(struct task_struct *tsk) | 39 | void __sanitize_i387_state(struct task_struct *tsk) |
46 | { | 40 | { |
47 | u64 xstate_bv; | ||
48 | int feature_bit = 0x2; | ||
49 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; | 41 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; |
42 | int feature_bit = 0x2; | ||
43 | u64 xstate_bv; | ||
50 | 44 | ||
51 | if (!fx) | 45 | if (!fx) |
52 | return; | 46 | return; |
@@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk) | |||
104 | * Check for the presence of extended state information in the | 98 | * Check for the presence of extended state information in the |
105 | * user fpstate pointer in the sigcontext. | 99 | * user fpstate pointer in the sigcontext. |
106 | */ | 100 | */ |
107 | int check_for_xstate(struct i387_fxsave_struct __user *buf, | 101 | static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, |
108 | void __user *fpstate, | 102 | void __user *fpstate, |
109 | struct _fpx_sw_bytes *fx_sw_user) | 103 | struct _fpx_sw_bytes *fx_sw) |
110 | { | 104 | { |
111 | int min_xstate_size = sizeof(struct i387_fxsave_struct) + | 105 | int min_xstate_size = sizeof(struct i387_fxsave_struct) + |
112 | sizeof(struct xsave_hdr_struct); | 106 | sizeof(struct xsave_hdr_struct); |
113 | unsigned int magic2; | 107 | unsigned int magic2; |
114 | int err; | ||
115 | 108 | ||
116 | err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], | 109 | if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) |
117 | sizeof(struct _fpx_sw_bytes)); | 110 | return -1; |
118 | if (err) | ||
119 | return -EFAULT; | ||
120 | 111 | ||
121 | /* | 112 | /* Check for the first magic field and other error scenarios. */ |
122 | * First Magic check failed. | 113 | if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || |
123 | */ | 114 | fx_sw->xstate_size < min_xstate_size || |
124 | if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) | 115 | fx_sw->xstate_size > xstate_size || |
125 | return -EINVAL; | 116 | fx_sw->xstate_size > fx_sw->extended_size) |
117 | return -1; | ||
126 | 118 | ||
127 | /* | 119 | /* |
128 | * Check for error scenarios. | ||
129 | */ | ||
130 | if (fx_sw_user->xstate_size < min_xstate_size || | ||
131 | fx_sw_user->xstate_size > xstate_size || | ||
132 | fx_sw_user->xstate_size > fx_sw_user->extended_size) | ||
133 | return -EINVAL; | ||
134 | |||
135 | err = __get_user(magic2, (__u32 *) (((void *)fpstate) + | ||
136 | fx_sw_user->extended_size - | ||
137 | FP_XSTATE_MAGIC2_SIZE)); | ||
138 | if (err) | ||
139 | return err; | ||
140 | /* | ||
141 | * Check for the presence of second magic word at the end of memory | 120 | * Check for the presence of second magic word at the end of memory |
142 | * layout. This detects the case where the user just copied the legacy | 121 | * layout. This detects the case where the user just copied the legacy |
143 | * fpstate layout with out copying the extended state information | 122 | * fpstate layout with out copying the extended state information |
144 | * in the memory layout. | 123 | * in the memory layout. |
145 | */ | 124 | */ |
146 | if (magic2 != FP_XSTATE_MAGIC2) | 125 | if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) |
147 | return -EFAULT; | 126 | || magic2 != FP_XSTATE_MAGIC2) |
127 | return -1; | ||
148 | 128 | ||
149 | return 0; | 129 | return 0; |
150 | } | 130 | } |
151 | 131 | ||
152 | #ifdef CONFIG_X86_64 | ||
153 | /* | 132 | /* |
154 | * Signal frame handlers. | 133 | * Signal frame handlers. |
155 | */ | 134 | */ |
156 | 135 | static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) | |
157 | int save_i387_xstate(void __user *buf) | ||
158 | { | 136 | { |
159 | struct task_struct *tsk = current; | 137 | if (use_fxsr()) { |
160 | int err = 0; | 138 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; |
161 | 139 | struct user_i387_ia32_struct env; | |
162 | if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) | 140 | struct _fpstate_ia32 __user *fp = buf; |
163 | return -EACCES; | ||
164 | 141 | ||
165 | BUG_ON(sig_xstate_size < xstate_size); | 142 | convert_from_fxsr(&env, tsk); |
166 | 143 | ||
167 | if ((unsigned long)buf % 64) | 144 | if (__copy_to_user(buf, &env, sizeof(env)) || |
168 | pr_err("%s: bad fpstate %p\n", __func__, buf); | 145 | __put_user(xsave->i387.swd, &fp->status) || |
169 | 146 | __put_user(X86_FXSR_MAGIC, &fp->magic)) | |
170 | if (!used_math()) | 147 | return -1; |
171 | return 0; | ||
172 | |||
173 | if (user_has_fpu()) { | ||
174 | if (use_xsave()) | ||
175 | err = xsave_user(buf); | ||
176 | else | ||
177 | err = fxsave_user(buf); | ||
178 | |||
179 | if (err) | ||
180 | return err; | ||
181 | user_fpu_end(); | ||
182 | } else { | 148 | } else { |
183 | sanitize_i387_state(tsk); | 149 | struct i387_fsave_struct __user *fp = buf; |
184 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, | 150 | u32 swd; |
185 | xstate_size)) | 151 | if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) |
186 | return -1; | 152 | return -1; |
187 | } | 153 | } |
188 | 154 | ||
189 | clear_used_math(); /* trigger finit */ | 155 | return 0; |
156 | } | ||
190 | 157 | ||
191 | if (use_xsave()) { | 158 | static inline int save_xstate_epilog(void __user *buf, int ia32_frame) |
192 | struct _fpstate __user *fx = buf; | 159 | { |
193 | struct _xstate __user *x = buf; | 160 | struct xsave_struct __user *x = buf; |
194 | u64 xstate_bv; | 161 | struct _fpx_sw_bytes *sw_bytes; |
162 | u32 xstate_bv; | ||
163 | int err; | ||
195 | 164 | ||
196 | err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, | 165 | /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ |
197 | sizeof(struct _fpx_sw_bytes)); | 166 | sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; |
167 | err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); | ||
198 | 168 | ||
199 | err |= __put_user(FP_XSTATE_MAGIC2, | 169 | if (!use_xsave()) |
200 | (__u32 __user *) (buf + sig_xstate_size | 170 | return err; |
201 | - FP_XSTATE_MAGIC2_SIZE)); | ||
202 | 171 | ||
203 | /* | 172 | err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); |
204 | * Read the xstate_bv which we copied (directly from the cpu or | ||
205 | * from the state in task struct) to the user buffers and | ||
206 | * set the FP/SSE bits. | ||
207 | */ | ||
208 | err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); | ||
209 | 173 | ||
210 | /* | 174 | /* |
211 | * For legacy compatible, we always set FP/SSE bits in the bit | 175 | * Read the xstate_bv which we copied (directly from the cpu or |
212 | * vector while saving the state to the user context. This will | 176 | * from the state in task struct) to the user buffers. |
213 | * enable us capturing any changes(during sigreturn) to | 177 | */ |
214 | * the FP/SSE bits by the legacy applications which don't touch | 178 | err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); |
215 | * xstate_bv in the xsave header. | ||
216 | * | ||
217 | * xsave aware apps can change the xstate_bv in the xsave | ||
218 | * header as well as change any contents in the memory layout. | ||
219 | * xrestore as part of sigreturn will capture all the changes. | ||
220 | */ | ||
221 | xstate_bv |= XSTATE_FPSSE; | ||
222 | 179 | ||
223 | err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); | 180 | /* |
181 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
182 | * vector while saving the state to the user context. This will | ||
183 | * enable us capturing any changes(during sigreturn) to | ||
184 | * the FP/SSE bits by the legacy applications which don't touch | ||
185 | * xstate_bv in the xsave header. | ||
186 | * | ||
187 | * xsave aware apps can change the xstate_bv in the xsave | ||
188 | * header as well as change any contents in the memory layout. | ||
189 | * xrestore as part of sigreturn will capture all the changes. | ||
190 | */ | ||
191 | xstate_bv |= XSTATE_FPSSE; | ||
224 | 192 | ||
225 | if (err) | 193 | err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); |
226 | return err; | ||
227 | } | ||
228 | 194 | ||
229 | return 1; | 195 | return err; |
196 | } | ||
197 | |||
198 | static inline int save_user_xstate(struct xsave_struct __user *buf) | ||
199 | { | ||
200 | int err; | ||
201 | |||
202 | if (use_xsave()) | ||
203 | err = xsave_user(buf); | ||
204 | else if (use_fxsr()) | ||
205 | err = fxsave_user((struct i387_fxsave_struct __user *) buf); | ||
206 | else | ||
207 | err = fsave_user((struct i387_fsave_struct __user *) buf); | ||
208 | |||
209 | if (unlikely(err) && __clear_user(buf, xstate_size)) | ||
210 | err = -EFAULT; | ||
211 | return err; | ||
230 | } | 212 | } |
231 | 213 | ||
232 | /* | 214 | /* |
233 | * Restore the extended state if present. Otherwise, restore the FP/SSE | 215 | * Save the fpu, extended register state to the user signal frame. |
234 | * state. | 216 | * |
217 | * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save | ||
218 | * state is copied. | ||
219 | * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. | ||
220 | * | ||
221 | * buf == buf_fx for 64-bit frames and 32-bit fsave frame. | ||
222 | * buf != buf_fx for 32-bit frames with fxstate. | ||
223 | * | ||
224 | * If the fpu, extended register state is live, save the state directly | ||
225 | * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, | ||
226 | * copy the thread's fpu state to the user frame starting at 'buf_fx'. | ||
227 | * | ||
228 | * If this is a 32-bit frame with fxstate, put a fsave header before | ||
229 | * the aligned state at 'buf_fx'. | ||
230 | * | ||
231 | * For [f]xsave state, update the SW reserved fields in the [f]xsave frame | ||
232 | * indicating the absence/presence of the extended state to the user. | ||
235 | */ | 233 | */ |
236 | static int restore_user_xstate(void __user *buf) | 234 | int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) |
237 | { | 235 | { |
238 | struct _fpx_sw_bytes fx_sw_user; | 236 | struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; |
239 | u64 mask; | 237 | struct task_struct *tsk = current; |
240 | int err; | 238 | int ia32_fxstate = (buf != buf_fx); |
241 | 239 | ||
242 | if (((unsigned long)buf % 64) || | 240 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || |
243 | check_for_xstate(buf, buf, &fx_sw_user)) | 241 | config_enabled(CONFIG_IA32_EMULATION)); |
244 | goto fx_only; | ||
245 | 242 | ||
246 | mask = fx_sw_user.xstate_bv; | 243 | if (!access_ok(VERIFY_WRITE, buf, size)) |
244 | return -EACCES; | ||
247 | 245 | ||
248 | /* | 246 | if (!HAVE_HWFP) |
249 | * restore the state passed by the user. | 247 | return fpregs_soft_get(current, NULL, 0, |
250 | */ | 248 | sizeof(struct user_i387_ia32_struct), NULL, |
251 | err = xrestore_user(buf, mask); | 249 | (struct _fpstate_ia32 __user *) buf) ? -1 : 1; |
252 | if (err) | ||
253 | return err; | ||
254 | 250 | ||
255 | /* | 251 | if (user_has_fpu()) { |
256 | * init the state skipped by the user. | 252 | /* Save the live register state to the user directly. */ |
257 | */ | 253 | if (save_user_xstate(buf_fx)) |
258 | mask = pcntxt_mask & ~mask; | 254 | return -1; |
259 | if (unlikely(mask)) | 255 | /* Update the thread's fxstate to save the fsave header. */ |
260 | xrstor_state(init_xstate_buf, mask); | 256 | if (ia32_fxstate) |
257 | fpu_fxsave(&tsk->thread.fpu); | ||
258 | } else { | ||
259 | sanitize_i387_state(tsk); | ||
260 | if (__copy_to_user(buf_fx, xsave, xstate_size)) | ||
261 | return -1; | ||
262 | } | ||
263 | |||
264 | /* Save the fsave header for the 32-bit frames. */ | ||
265 | if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) | ||
266 | return -1; | ||
267 | |||
268 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) | ||
269 | return -1; | ||
270 | |||
271 | drop_init_fpu(tsk); /* trigger finit */ | ||
261 | 272 | ||
262 | return 0; | 273 | return 0; |
274 | } | ||
263 | 275 | ||
264 | fx_only: | 276 | static inline void |
265 | /* | 277 | sanitize_restored_xstate(struct task_struct *tsk, |
266 | * couldn't find the extended state information in the | 278 | struct user_i387_ia32_struct *ia32_env, |
267 | * memory layout. Restore just the FP/SSE and init all | 279 | u64 xstate_bv, int fx_only) |
268 | * the other extended state. | 280 | { |
269 | */ | 281 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; |
270 | xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); | 282 | struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; |
271 | return fxrstor_checking((__force struct i387_fxsave_struct *)buf); | 283 | |
284 | if (use_xsave()) { | ||
285 | /* These bits must be zero. */ | ||
286 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
287 | |||
288 | /* | ||
289 | * Init the state that is not present in the memory | ||
290 | * layout and not enabled by the OS. | ||
291 | */ | ||
292 | if (fx_only) | ||
293 | xsave_hdr->xstate_bv = XSTATE_FPSSE; | ||
294 | else | ||
295 | xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); | ||
296 | } | ||
297 | |||
298 | if (use_fxsr()) { | ||
299 | /* | ||
300 | * mscsr reserved bits must be masked to zero for security | ||
301 | * reasons. | ||
302 | */ | ||
303 | xsave->i387.mxcsr &= mxcsr_feature_mask; | ||
304 | |||
305 | convert_to_fxsr(tsk, ia32_env); | ||
306 | } | ||
272 | } | 307 | } |
273 | 308 | ||
274 | /* | 309 | /* |
275 | * This restores directly out of user space. Exceptions are handled. | 310 | * Restore the extended state if present. Otherwise, restore the FP/SSE state. |
276 | */ | 311 | */ |
277 | int restore_i387_xstate(void __user *buf) | 312 | static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) |
278 | { | 313 | { |
314 | if (use_xsave()) { | ||
315 | if ((unsigned long)buf % 64 || fx_only) { | ||
316 | u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; | ||
317 | xrstor_state(init_xstate_buf, init_bv); | ||
318 | return fxrstor_user(buf); | ||
319 | } else { | ||
320 | u64 init_bv = pcntxt_mask & ~xbv; | ||
321 | if (unlikely(init_bv)) | ||
322 | xrstor_state(init_xstate_buf, init_bv); | ||
323 | return xrestore_user(buf, xbv); | ||
324 | } | ||
325 | } else if (use_fxsr()) { | ||
326 | return fxrstor_user(buf); | ||
327 | } else | ||
328 | return frstor_user(buf); | ||
329 | } | ||
330 | |||
331 | int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | ||
332 | { | ||
333 | int ia32_fxstate = (buf != buf_fx); | ||
279 | struct task_struct *tsk = current; | 334 | struct task_struct *tsk = current; |
280 | int err = 0; | 335 | int state_size = xstate_size; |
336 | u64 xstate_bv = 0; | ||
337 | int fx_only = 0; | ||
338 | |||
339 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || | ||
340 | config_enabled(CONFIG_IA32_EMULATION)); | ||
281 | 341 | ||
282 | if (!buf) { | 342 | if (!buf) { |
283 | if (used_math()) | 343 | drop_init_fpu(tsk); |
284 | goto clear; | ||
285 | return 0; | 344 | return 0; |
286 | } else | 345 | } |
287 | if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) | ||
288 | return -EACCES; | ||
289 | 346 | ||
290 | if (!used_math()) { | 347 | if (!access_ok(VERIFY_READ, buf, size)) |
291 | err = init_fpu(tsk); | 348 | return -EACCES; |
292 | if (err) | 349 | |
293 | return err; | 350 | if (!used_math() && init_fpu(tsk)) |
351 | return -1; | ||
352 | |||
353 | if (!HAVE_HWFP) { | ||
354 | return fpregs_soft_set(current, NULL, | ||
355 | 0, sizeof(struct user_i387_ia32_struct), | ||
356 | NULL, buf) != 0; | ||
294 | } | 357 | } |
295 | 358 | ||
296 | user_fpu_begin(); | 359 | if (use_xsave()) { |
297 | if (use_xsave()) | 360 | struct _fpx_sw_bytes fx_sw_user; |
298 | err = restore_user_xstate(buf); | 361 | if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { |
299 | else | 362 | /* |
300 | err = fxrstor_checking((__force struct i387_fxsave_struct *) | 363 | * Couldn't find the extended state information in the |
301 | buf); | 364 | * memory layout. Restore just the FP/SSE and init all |
302 | if (unlikely(err)) { | 365 | * the other extended state. |
366 | */ | ||
367 | state_size = sizeof(struct i387_fxsave_struct); | ||
368 | fx_only = 1; | ||
369 | } else { | ||
370 | state_size = fx_sw_user.xstate_size; | ||
371 | xstate_bv = fx_sw_user.xstate_bv; | ||
372 | } | ||
373 | } | ||
374 | |||
375 | if (ia32_fxstate) { | ||
376 | /* | ||
377 | * For 32-bit frames with fxstate, copy the user state to the | ||
378 | * thread's fpu state, reconstruct fxstate from the fsave | ||
379 | * header. Sanitize the copied state etc. | ||
380 | */ | ||
381 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; | ||
382 | struct user_i387_ia32_struct env; | ||
383 | int err = 0; | ||
384 | |||
385 | /* | ||
386 | * Drop the current fpu which clears used_math(). This ensures | ||
387 | * that any context-switch during the copy of the new state, | ||
388 | * avoids the intermediate state from getting restored/saved. | ||
389 | * Thus avoiding the new restored state from getting corrupted. | ||
390 | * We will be ready to restore/save the state only after | ||
391 | * set_used_math() is again set. | ||
392 | */ | ||
393 | drop_fpu(tsk); | ||
394 | |||
395 | if (__copy_from_user(xsave, buf_fx, state_size) || | ||
396 | __copy_from_user(&env, buf, sizeof(env))) { | ||
397 | err = -1; | ||
398 | } else { | ||
399 | sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); | ||
400 | set_used_math(); | ||
401 | } | ||
402 | |||
403 | if (use_eager_fpu()) | ||
404 | math_state_restore(); | ||
405 | |||
406 | return err; | ||
407 | } else { | ||
303 | /* | 408 | /* |
304 | * Encountered an error while doing the restore from the | 409 | * For 64-bit frames and 32-bit fsave frames, restore the user |
305 | * user buffer, clear the fpu state. | 410 | * state to the registers directly (with exceptions handled). |
306 | */ | 411 | */ |
307 | clear: | 412 | user_fpu_begin(); |
308 | clear_fpu(tsk); | 413 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { |
309 | clear_used_math(); | 414 | drop_init_fpu(tsk); |
415 | return -1; | ||
416 | } | ||
310 | } | 417 | } |
311 | return err; | 418 | |
419 | return 0; | ||
312 | } | 420 | } |
313 | #endif | ||
314 | 421 | ||
315 | /* | 422 | /* |
316 | * Prepare the SW reserved portion of the fxsave memory layout, indicating | 423 | * Prepare the SW reserved portion of the fxsave memory layout, indicating |
@@ -321,31 +428,22 @@ clear: | |||
321 | */ | 428 | */ |
322 | static void prepare_fx_sw_frame(void) | 429 | static void prepare_fx_sw_frame(void) |
323 | { | 430 | { |
324 | int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + | 431 | int fsave_header_size = sizeof(struct i387_fsave_struct); |
325 | FP_XSTATE_MAGIC2_SIZE; | 432 | int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; |
326 | 433 | ||
327 | sig_xstate_size = sizeof(struct _fpstate) + size_extended; | 434 | if (config_enabled(CONFIG_X86_32)) |
328 | 435 | size += fsave_header_size; | |
329 | #ifdef CONFIG_IA32_EMULATION | ||
330 | sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; | ||
331 | #endif | ||
332 | |||
333 | memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); | ||
334 | 436 | ||
335 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; | 437 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; |
336 | fx_sw_reserved.extended_size = sig_xstate_size; | 438 | fx_sw_reserved.extended_size = size; |
337 | fx_sw_reserved.xstate_bv = pcntxt_mask; | 439 | fx_sw_reserved.xstate_bv = pcntxt_mask; |
338 | fx_sw_reserved.xstate_size = xstate_size; | 440 | fx_sw_reserved.xstate_size = xstate_size; |
339 | #ifdef CONFIG_IA32_EMULATION | ||
340 | memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, | ||
341 | sizeof(struct _fpx_sw_bytes)); | ||
342 | fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; | ||
343 | #endif | ||
344 | } | ||
345 | 441 | ||
346 | #ifdef CONFIG_X86_64 | 442 | if (config_enabled(CONFIG_IA32_EMULATION)) { |
347 | unsigned int sig_xstate_size = sizeof(struct _fpstate); | 443 | fx_sw_reserved_ia32 = fx_sw_reserved; |
348 | #endif | 444 | fx_sw_reserved_ia32.extended_size += fsave_header_size; |
445 | } | ||
446 | } | ||
349 | 447 | ||
350 | /* | 448 | /* |
351 | * Enable the extended processor state save/restore feature | 449 | * Enable the extended processor state save/restore feature |
@@ -384,19 +482,21 @@ static void __init setup_xstate_features(void) | |||
384 | /* | 482 | /* |
385 | * setup the xstate image representing the init state | 483 | * setup the xstate image representing the init state |
386 | */ | 484 | */ |
387 | static void __init setup_xstate_init(void) | 485 | static void __init setup_init_fpu_buf(void) |
388 | { | 486 | { |
389 | setup_xstate_features(); | ||
390 | |||
391 | /* | 487 | /* |
392 | * Setup init_xstate_buf to represent the init state of | 488 | * Setup init_xstate_buf to represent the init state of |
393 | * all the features managed by the xsave | 489 | * all the features managed by the xsave |
394 | */ | 490 | */ |
395 | init_xstate_buf = alloc_bootmem_align(xstate_size, | 491 | init_xstate_buf = alloc_bootmem_align(xstate_size, |
396 | __alignof__(struct xsave_struct)); | 492 | __alignof__(struct xsave_struct)); |
397 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 493 | fx_finit(&init_xstate_buf->i387); |
494 | |||
495 | if (!cpu_has_xsave) | ||
496 | return; | ||
497 | |||
498 | setup_xstate_features(); | ||
398 | 499 | ||
399 | clts(); | ||
400 | /* | 500 | /* |
401 | * Init all the features state with header_bv being 0x0 | 501 | * Init all the features state with header_bv being 0x0 |
402 | */ | 502 | */ |
@@ -406,9 +506,21 @@ static void __init setup_xstate_init(void) | |||
406 | * of any feature which is not represented by all zero's. | 506 | * of any feature which is not represented by all zero's. |
407 | */ | 507 | */ |
408 | xsave_state(init_xstate_buf, -1); | 508 | xsave_state(init_xstate_buf, -1); |
409 | stts(); | ||
410 | } | 509 | } |
411 | 510 | ||
511 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; | ||
512 | static int __init eager_fpu_setup(char *s) | ||
513 | { | ||
514 | if (!strcmp(s, "on")) | ||
515 | eagerfpu = ENABLE; | ||
516 | else if (!strcmp(s, "off")) | ||
517 | eagerfpu = DISABLE; | ||
518 | else if (!strcmp(s, "auto")) | ||
519 | eagerfpu = AUTO; | ||
520 | return 1; | ||
521 | } | ||
522 | __setup("eagerfpu=", eager_fpu_setup); | ||
523 | |||
412 | /* | 524 | /* |
413 | * Enable and initialize the xsave feature. | 525 | * Enable and initialize the xsave feature. |
414 | */ | 526 | */ |
@@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void) | |||
445 | 557 | ||
446 | update_regset_xstate_info(xstate_size, pcntxt_mask); | 558 | update_regset_xstate_info(xstate_size, pcntxt_mask); |
447 | prepare_fx_sw_frame(); | 559 | prepare_fx_sw_frame(); |
560 | setup_init_fpu_buf(); | ||
448 | 561 | ||
449 | setup_xstate_init(); | 562 | /* Auto enable eagerfpu for xsaveopt */ |
563 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | ||
564 | eagerfpu = ENABLE; | ||
450 | 565 | ||
451 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 566 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
452 | pcntxt_mask, xstate_size); | 567 | pcntxt_mask, xstate_size); |
@@ -471,3 +586,43 @@ void __cpuinit xsave_init(void) | |||
471 | next_func = xstate_enable; | 586 | next_func = xstate_enable; |
472 | this_func(); | 587 | this_func(); |
473 | } | 588 | } |
589 | |||
590 | static inline void __init eager_fpu_init_bp(void) | ||
591 | { | ||
592 | current->thread.fpu.state = | ||
593 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
594 | if (!init_xstate_buf) | ||
595 | setup_init_fpu_buf(); | ||
596 | } | ||
597 | |||
598 | void __cpuinit eager_fpu_init(void) | ||
599 | { | ||
600 | static __refdata void (*boot_func)(void) = eager_fpu_init_bp; | ||
601 | |||
602 | clear_used_math(); | ||
603 | current_thread_info()->status = 0; | ||
604 | |||
605 | if (eagerfpu == ENABLE) | ||
606 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | ||
607 | |||
608 | if (!cpu_has_eager_fpu) { | ||
609 | stts(); | ||
610 | return; | ||
611 | } | ||
612 | |||
613 | if (boot_func) { | ||
614 | boot_func(); | ||
615 | boot_func = NULL; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * This is same as math_state_restore(). But use_xsave() is | ||
620 | * not yet patched to use math_state_restore(). | ||
621 | */ | ||
622 | init_fpu(current); | ||
623 | __thread_fpu_begin(current); | ||
624 | if (cpu_has_xsave) | ||
625 | xrstor_state(init_xstate_buf, -1); | ||
626 | else | ||
627 | fxrstor_checking(&init_xstate_buf->i387); | ||
628 | } | ||