diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-10-19 15:19:19 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-10-19 15:19:19 -0400 |
commit | e05dacd71db0a5da7c1a44bcaab2a8a240b9c233 (patch) | |
tree | 31382cf1c7d62c03126448affb2fc86e8c4aaa8b /arch/x86/kernel | |
parent | 3ab0b83bf6a1e834f4b884150d8012990c75d25d (diff) | |
parent | ddffeb8c4d0331609ef2581d84de4d763607bd37 (diff) |
Merge commit 'v3.7-rc1' into stable/for-linus-3.7
* commit 'v3.7-rc1': (10892 commits)
Linux 3.7-rc1
x86, boot: Explicitly include autoconf.h for hostprogs
perf: Fix UAPI fallout
ARM: config: make sure that platforms are ordered by option string
ARM: config: sort select statements alphanumerically
UAPI: (Scripted) Disintegrate include/linux/byteorder
UAPI: (Scripted) Disintegrate include/linux
UAPI: Unexport linux/blk_types.h
UAPI: Unexport part of linux/ppp-comp.h
perf: Handle new rbtree implementation
procfs: don't need a PATH_MAX allocation to hold a string representation of an int
vfs: embed struct filename inside of names_cache allocation if possible
audit: make audit_inode take struct filename
vfs: make path_openat take a struct filename pointer
vfs: turn do_path_lookup into wrapper around struct filename variant
audit: allow audit code to satisfy getname requests from its names_list
vfs: define struct filename and have getname() return it
btrfs: Fix compilation with user namespace support enabled
userns: Fix posix_acl_file_xattr_userns gid conversion
userns: Properly print bluetooth socket uids
...
Diffstat (limited to 'arch/x86/kernel')
61 files changed, 2402 insertions, 1370 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d97..91ce48f05f9f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -23,7 +23,7 @@ obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o | |||
23 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o | 23 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o |
24 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 24 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
25 | obj-y += probe_roms.o | 25 | obj-y += probe_roms.o |
26 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 26 | obj-$(CONFIG_X86_32) += i386_ksyms_32.o |
27 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 27 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
28 | obj-y += syscall_$(BITS).o | 28 | obj-y += syscall_$(BITS).o |
29 | obj-$(CONFIG_X86_64) += vsyscall_64.o | 29 | obj-$(CONFIG_X86_64) += vsyscall_64.o |
@@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | |||
81 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 81 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
82 | obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o | 82 | obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o |
83 | 83 | ||
84 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 84 | obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o |
85 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | ||
86 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 85 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
87 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o | 86 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o |
88 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | 87 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o |
@@ -100,6 +99,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | |||
100 | obj-$(CONFIG_OF) += devicetree.o | 99 | obj-$(CONFIG_OF) += devicetree.o |
101 | obj-$(CONFIG_UPROBES) += uprobes.o | 100 | obj-$(CONFIG_UPROBES) += uprobes.o |
102 | 101 | ||
102 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | ||
103 | |||
103 | ### | 104 | ### |
104 | # 64 bit specific files | 105 | # 64 bit specific files |
105 | ifeq ($(CONFIG_X86_64),y) | 106 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6ed..e651f7a589ac 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
656 | acpi_register_lapic(physid, ACPI_MADT_ENABLED); | 656 | acpi_register_lapic(physid, ACPI_MADT_ENABLED); |
657 | 657 | ||
658 | /* | 658 | /* |
659 | * If mp_register_lapic successfully generates a new logical cpu | 659 | * If acpi_register_lapic successfully generates a new logical cpu |
660 | * number, then the following will get us exactly what was mapped | 660 | * number, then the following will get us exactly what was mapped |
661 | */ | 661 | */ |
662 | cpumask_andnot(new_map, cpu_present_mask, tmp_map); | 662 | cpumask_andnot(new_map, cpu_present_mask, tmp_map); |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1b8e5a03d942..11676cf65aee 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void) | |||
43 | 43 | ||
44 | header->video_mode = saved_video_mode; | 44 | header->video_mode = saved_video_mode; |
45 | 45 | ||
46 | header->pmode_behavior = 0; | ||
47 | |||
46 | #ifndef CONFIG_64BIT | 48 | #ifndef CONFIG_64BIT |
47 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 49 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
48 | 50 | ||
49 | if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, | 51 | if (!rdmsr_safe(MSR_EFER, |
50 | &header->pmode_efer_high)) | 52 | &header->pmode_efer_low, |
51 | header->pmode_efer_low = header->pmode_efer_high = 0; | 53 | &header->pmode_efer_high)) |
54 | header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER); | ||
52 | #endif /* !CONFIG_64BIT */ | 55 | #endif /* !CONFIG_64BIT */ |
53 | 56 | ||
54 | header->pmode_cr0 = read_cr0(); | 57 | header->pmode_cr0 = read_cr0(); |
55 | header->pmode_cr4 = read_cr4_safe(); | 58 | if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { |
56 | header->pmode_behavior = 0; | 59 | header->pmode_cr4 = read_cr4(); |
60 | header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); | ||
61 | } | ||
57 | if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, | 62 | if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, |
58 | &header->pmode_misc_en_low, | 63 | &header->pmode_misc_en_low, |
59 | &header->pmode_misc_en_high)) | 64 | &header->pmode_misc_en_high)) |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ced4534baed5..ef5ccca79a6c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -23,19 +23,6 @@ | |||
23 | 23 | ||
24 | #define MAX_PATCH_LEN (255-1) | 24 | #define MAX_PATCH_LEN (255-1) |
25 | 25 | ||
26 | #ifdef CONFIG_HOTPLUG_CPU | ||
27 | static int smp_alt_once; | ||
28 | |||
29 | static int __init bootonly(char *str) | ||
30 | { | ||
31 | smp_alt_once = 1; | ||
32 | return 1; | ||
33 | } | ||
34 | __setup("smp-alt-boot", bootonly); | ||
35 | #else | ||
36 | #define smp_alt_once 1 | ||
37 | #endif | ||
38 | |||
39 | static int __initdata_or_module debug_alternative; | 26 | static int __initdata_or_module debug_alternative; |
40 | 27 | ||
41 | static int __init debug_alt(char *str) | 28 | static int __init debug_alt(char *str) |
@@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, | |||
317 | /* turn DS segment override prefix into lock prefix */ | 304 | /* turn DS segment override prefix into lock prefix */ |
318 | if (*ptr == 0x3e) | 305 | if (*ptr == 0x3e) |
319 | text_poke(ptr, ((unsigned char []){0xf0}), 1); | 306 | text_poke(ptr, ((unsigned char []){0xf0}), 1); |
320 | }; | 307 | } |
321 | mutex_unlock(&text_mutex); | 308 | mutex_unlock(&text_mutex); |
322 | } | 309 | } |
323 | 310 | ||
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, | |||
326 | { | 313 | { |
327 | const s32 *poff; | 314 | const s32 *poff; |
328 | 315 | ||
329 | if (noreplace_smp) | ||
330 | return; | ||
331 | |||
332 | mutex_lock(&text_mutex); | 316 | mutex_lock(&text_mutex); |
333 | for (poff = start; poff < end; poff++) { | 317 | for (poff = start; poff < end; poff++) { |
334 | u8 *ptr = (u8 *)poff + *poff; | 318 | u8 *ptr = (u8 *)poff + *poff; |
@@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, | |||
338 | /* turn lock prefix into DS segment override prefix */ | 322 | /* turn lock prefix into DS segment override prefix */ |
339 | if (*ptr == 0xf0) | 323 | if (*ptr == 0xf0) |
340 | text_poke(ptr, ((unsigned char []){0x3E}), 1); | 324 | text_poke(ptr, ((unsigned char []){0x3E}), 1); |
341 | }; | 325 | } |
342 | mutex_unlock(&text_mutex); | 326 | mutex_unlock(&text_mutex); |
343 | } | 327 | } |
344 | 328 | ||
@@ -359,7 +343,7 @@ struct smp_alt_module { | |||
359 | }; | 343 | }; |
360 | static LIST_HEAD(smp_alt_modules); | 344 | static LIST_HEAD(smp_alt_modules); |
361 | static DEFINE_MUTEX(smp_alt); | 345 | static DEFINE_MUTEX(smp_alt); |
362 | static int smp_mode = 1; /* protected by smp_alt */ | 346 | static bool uniproc_patched = false; /* protected by smp_alt */ |
363 | 347 | ||
364 | void __init_or_module alternatives_smp_module_add(struct module *mod, | 348 | void __init_or_module alternatives_smp_module_add(struct module *mod, |
365 | char *name, | 349 | char *name, |
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, | |||
368 | { | 352 | { |
369 | struct smp_alt_module *smp; | 353 | struct smp_alt_module *smp; |
370 | 354 | ||
371 | if (noreplace_smp) | 355 | mutex_lock(&smp_alt); |
372 | return; | 356 | if (!uniproc_patched) |
357 | goto unlock; | ||
373 | 358 | ||
374 | if (smp_alt_once) { | 359 | if (num_possible_cpus() == 1) |
375 | if (boot_cpu_has(X86_FEATURE_UP)) | 360 | /* Don't bother remembering, we'll never have to undo it. */ |
376 | alternatives_smp_unlock(locks, locks_end, | 361 | goto smp_unlock; |
377 | text, text_end); | ||
378 | return; | ||
379 | } | ||
380 | 362 | ||
381 | smp = kzalloc(sizeof(*smp), GFP_KERNEL); | 363 | smp = kzalloc(sizeof(*smp), GFP_KERNEL); |
382 | if (NULL == smp) | 364 | if (NULL == smp) |
383 | return; /* we'll run the (safe but slow) SMP code then ... */ | 365 | /* we'll run the (safe but slow) SMP code then ... */ |
366 | goto unlock; | ||
384 | 367 | ||
385 | smp->mod = mod; | 368 | smp->mod = mod; |
386 | smp->name = name; | 369 | smp->name = name; |
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, | |||
392 | __func__, smp->locks, smp->locks_end, | 375 | __func__, smp->locks, smp->locks_end, |
393 | smp->text, smp->text_end, smp->name); | 376 | smp->text, smp->text_end, smp->name); |
394 | 377 | ||
395 | mutex_lock(&smp_alt); | ||
396 | list_add_tail(&smp->next, &smp_alt_modules); | 378 | list_add_tail(&smp->next, &smp_alt_modules); |
397 | if (boot_cpu_has(X86_FEATURE_UP)) | 379 | smp_unlock: |
398 | alternatives_smp_unlock(smp->locks, smp->locks_end, | 380 | alternatives_smp_unlock(locks, locks_end, text, text_end); |
399 | smp->text, smp->text_end); | 381 | unlock: |
400 | mutex_unlock(&smp_alt); | 382 | mutex_unlock(&smp_alt); |
401 | } | 383 | } |
402 | 384 | ||
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) | |||
404 | { | 386 | { |
405 | struct smp_alt_module *item; | 387 | struct smp_alt_module *item; |
406 | 388 | ||
407 | if (smp_alt_once || noreplace_smp) | ||
408 | return; | ||
409 | |||
410 | mutex_lock(&smp_alt); | 389 | mutex_lock(&smp_alt); |
411 | list_for_each_entry(item, &smp_alt_modules, next) { | 390 | list_for_each_entry(item, &smp_alt_modules, next) { |
412 | if (mod != item->mod) | 391 | if (mod != item->mod) |
413 | continue; | 392 | continue; |
414 | list_del(&item->next); | 393 | list_del(&item->next); |
415 | mutex_unlock(&smp_alt); | ||
416 | DPRINTK("%s: %s\n", __func__, item->name); | ||
417 | kfree(item); | 394 | kfree(item); |
418 | return; | 395 | break; |
419 | } | 396 | } |
420 | mutex_unlock(&smp_alt); | 397 | mutex_unlock(&smp_alt); |
421 | } | 398 | } |
422 | 399 | ||
423 | bool skip_smp_alternatives; | 400 | void alternatives_enable_smp(void) |
424 | void alternatives_smp_switch(int smp) | ||
425 | { | 401 | { |
426 | struct smp_alt_module *mod; | 402 | struct smp_alt_module *mod; |
427 | 403 | ||
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp) | |||
436 | pr_info("lockdep: fixing up alternatives\n"); | 412 | pr_info("lockdep: fixing up alternatives\n"); |
437 | #endif | 413 | #endif |
438 | 414 | ||
439 | if (noreplace_smp || smp_alt_once || skip_smp_alternatives) | 415 | /* Why bother if there are no other CPUs? */ |
440 | return; | 416 | BUG_ON(num_possible_cpus() == 1); |
441 | BUG_ON(!smp && (num_online_cpus() > 1)); | ||
442 | 417 | ||
443 | mutex_lock(&smp_alt); | 418 | mutex_lock(&smp_alt); |
444 | 419 | ||
445 | /* | 420 | if (uniproc_patched) { |
446 | * Avoid unnecessary switches because it forces JIT based VMs to | ||
447 | * throw away all cached translations, which can be quite costly. | ||
448 | */ | ||
449 | if (smp == smp_mode) { | ||
450 | /* nothing */ | ||
451 | } else if (smp) { | ||
452 | pr_info("switching to SMP code\n"); | 421 | pr_info("switching to SMP code\n"); |
422 | BUG_ON(num_online_cpus() != 1); | ||
453 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 423 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
454 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 424 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
455 | list_for_each_entry(mod, &smp_alt_modules, next) | 425 | list_for_each_entry(mod, &smp_alt_modules, next) |
456 | alternatives_smp_lock(mod->locks, mod->locks_end, | 426 | alternatives_smp_lock(mod->locks, mod->locks_end, |
457 | mod->text, mod->text_end); | 427 | mod->text, mod->text_end); |
458 | } else { | 428 | uniproc_patched = false; |
459 | pr_info("switching to UP code\n"); | ||
460 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | ||
461 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | ||
462 | list_for_each_entry(mod, &smp_alt_modules, next) | ||
463 | alternatives_smp_unlock(mod->locks, mod->locks_end, | ||
464 | mod->text, mod->text_end); | ||
465 | } | 429 | } |
466 | smp_mode = smp; | ||
467 | mutex_unlock(&smp_alt); | 430 | mutex_unlock(&smp_alt); |
468 | } | 431 | } |
469 | 432 | ||
@@ -540,40 +503,22 @@ void __init alternative_instructions(void) | |||
540 | 503 | ||
541 | apply_alternatives(__alt_instructions, __alt_instructions_end); | 504 | apply_alternatives(__alt_instructions, __alt_instructions_end); |
542 | 505 | ||
543 | /* switch to patch-once-at-boottime-only mode and free the | ||
544 | * tables in case we know the number of CPUs will never ever | ||
545 | * change */ | ||
546 | #ifdef CONFIG_HOTPLUG_CPU | ||
547 | if (num_possible_cpus() < 2) | ||
548 | smp_alt_once = 1; | ||
549 | #endif | ||
550 | |||
551 | #ifdef CONFIG_SMP | 506 | #ifdef CONFIG_SMP |
552 | if (smp_alt_once) { | 507 | /* Patch to UP if other cpus not imminent. */ |
553 | if (1 == num_possible_cpus()) { | 508 | if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { |
554 | pr_info("switching to UP code\n"); | 509 | uniproc_patched = true; |
555 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | ||
556 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | ||
557 | |||
558 | alternatives_smp_unlock(__smp_locks, __smp_locks_end, | ||
559 | _text, _etext); | ||
560 | } | ||
561 | } else { | ||
562 | alternatives_smp_module_add(NULL, "core kernel", | 510 | alternatives_smp_module_add(NULL, "core kernel", |
563 | __smp_locks, __smp_locks_end, | 511 | __smp_locks, __smp_locks_end, |
564 | _text, _etext); | 512 | _text, _etext); |
565 | |||
566 | /* Only switch to UP mode if we don't immediately boot others */ | ||
567 | if (num_present_cpus() == 1 || setup_max_cpus <= 1) | ||
568 | alternatives_smp_switch(0); | ||
569 | } | 513 | } |
570 | #endif | ||
571 | apply_paravirt(__parainstructions, __parainstructions_end); | ||
572 | 514 | ||
573 | if (smp_alt_once) | 515 | if (!uniproc_patched || num_possible_cpus() == 1) |
574 | free_init_pages("SMP alternatives", | 516 | free_init_pages("SMP alternatives", |
575 | (unsigned long)__smp_locks, | 517 | (unsigned long)__smp_locks, |
576 | (unsigned long)__smp_locks_end); | 518 | (unsigned long)__smp_locks_end); |
519 | #endif | ||
520 | |||
521 | apply_paravirt(__parainstructions, __parainstructions_end); | ||
577 | 522 | ||
578 | restart_nmi(); | 523 | restart_nmi(); |
579 | } | 524 | } |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24deb3082328..b17416e72fbd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1934 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); | 1934 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); |
1935 | i++; | 1935 | i++; |
1936 | v1 >>= 1; | 1936 | v1 >>= 1; |
1937 | }; | 1937 | } |
1938 | 1938 | ||
1939 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); | 1939 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); |
1940 | 1940 | ||
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index bc552cff2578..a65829ac2b9a 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -30,7 +30,7 @@ | |||
30 | 30 | ||
31 | static int numachip_system __read_mostly; | 31 | static int numachip_system __read_mostly; |
32 | 32 | ||
33 | static struct apic apic_numachip __read_mostly; | 33 | static const struct apic apic_numachip __read_mostly; |
34 | 34 | ||
35 | static unsigned int get_apic_id(unsigned long x) | 35 | static unsigned int get_apic_id(unsigned long x) |
36 | { | 36 | { |
@@ -199,7 +199,7 @@ static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
199 | return 0; | 199 | return 0; |
200 | } | 200 | } |
201 | 201 | ||
202 | static struct apic apic_numachip __refconst = { | 202 | static const struct apic apic_numachip __refconst = { |
203 | 203 | ||
204 | .name = "NumaConnect system", | 204 | .name = "NumaConnect system", |
205 | .probe = numachip_probe, | 205 | .probe = numachip_probe, |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 68de2dc962ec..28610822fb3c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -69,4 +69,7 @@ void common(void) { | |||
69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | 69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); |
70 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); | 70 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); |
71 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); | 71 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); |
72 | |||
73 | BLANK(); | ||
74 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); | ||
72 | } | 75 | } |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d30a6a9a0121..a0e067d3d96c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o | |||
32 | 32 | ||
33 | ifdef CONFIG_PERF_EVENTS | 33 | ifdef CONFIG_PERF_EVENTS |
34 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o | 34 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o |
35 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o | 35 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o |
36 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 36 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o |
37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o | 37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o |
38 | endif | 38 | endif |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9d92e19039f0..f7e98a2c0d12 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, | |||
737 | } | 737 | } |
738 | #endif | 738 | #endif |
739 | 739 | ||
740 | static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) | ||
741 | { | ||
742 | if (!cpu_has_invlpg) | ||
743 | return; | ||
744 | |||
745 | tlb_flushall_shift = 5; | ||
746 | |||
747 | if (c->x86 <= 0x11) | ||
748 | tlb_flushall_shift = 4; | ||
749 | } | ||
750 | |||
751 | static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) | ||
752 | { | ||
753 | u32 ebx, eax, ecx, edx; | ||
754 | u16 mask = 0xfff; | ||
755 | |||
756 | if (c->x86 < 0xf) | ||
757 | return; | ||
758 | |||
759 | if (c->extended_cpuid_level < 0x80000006) | ||
760 | return; | ||
761 | |||
762 | cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | ||
763 | |||
764 | tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; | ||
765 | tlb_lli_4k[ENTRIES] = ebx & mask; | ||
766 | |||
767 | /* | ||
768 | * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB | ||
769 | * characteristics from the CPUID function 0x80000005 instead. | ||
770 | */ | ||
771 | if (c->x86 == 0xf) { | ||
772 | cpuid(0x80000005, &eax, &ebx, &ecx, &edx); | ||
773 | mask = 0xff; | ||
774 | } | ||
775 | |||
776 | /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ | ||
777 | if (!((eax >> 16) & mask)) { | ||
778 | u32 a, b, c, d; | ||
779 | |||
780 | cpuid(0x80000005, &a, &b, &c, &d); | ||
781 | tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; | ||
782 | } else { | ||
783 | tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; | ||
784 | } | ||
785 | |||
786 | /* a 4M entry uses two 2M entries */ | ||
787 | tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; | ||
788 | |||
789 | /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ | ||
790 | if (!(eax & mask)) { | ||
791 | /* Erratum 658 */ | ||
792 | if (c->x86 == 0x15 && c->x86_model <= 0x1f) { | ||
793 | tlb_lli_2m[ENTRIES] = 1024; | ||
794 | } else { | ||
795 | cpuid(0x80000005, &eax, &ebx, &ecx, &edx); | ||
796 | tlb_lli_2m[ENTRIES] = eax & 0xff; | ||
797 | } | ||
798 | } else | ||
799 | tlb_lli_2m[ENTRIES] = eax & mask; | ||
800 | |||
801 | tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; | ||
802 | |||
803 | cpu_set_tlb_flushall_shift(c); | ||
804 | } | ||
805 | |||
740 | static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | 806 | static const struct cpu_dev __cpuinitconst amd_cpu_dev = { |
741 | .c_vendor = "AMD", | 807 | .c_vendor = "AMD", |
742 | .c_ident = { "AuthenticAMD" }, | 808 | .c_ident = { "AuthenticAMD" }, |
@@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | |||
756 | .c_size_cache = amd_size_cache, | 822 | .c_size_cache = amd_size_cache, |
757 | #endif | 823 | #endif |
758 | .c_early_init = early_init_amd, | 824 | .c_early_init = early_init_amd, |
825 | .c_detect_tlb = cpu_detect_tlb_amd, | ||
759 | .c_bsp_init = bsp_init_amd, | 826 | .c_bsp_init = bsp_init_amd, |
760 | .c_init = init_amd, | 827 | .c_init = init_amd, |
761 | .c_x86_vendor = X86_VENDOR_AMD, | 828 | .c_x86_vendor = X86_VENDOR_AMD, |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c97bb7b5a9f8..d0e910da16c5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -165,10 +165,15 @@ void __init check_bugs(void) | |||
165 | print_cpu_info(&boot_cpu_data); | 165 | print_cpu_info(&boot_cpu_data); |
166 | #endif | 166 | #endif |
167 | check_config(); | 167 | check_config(); |
168 | check_fpu(); | ||
169 | check_hlt(); | 168 | check_hlt(); |
170 | check_popad(); | 169 | check_popad(); |
171 | init_utsname()->machine[1] = | 170 | init_utsname()->machine[1] = |
172 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | 171 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); |
173 | alternative_instructions(); | 172 | alternative_instructions(); |
173 | |||
174 | /* | ||
175 | * kernel_fpu_begin/end() in check_fpu() relies on the patched | ||
176 | * alternative instructions. | ||
177 | */ | ||
178 | check_fpu(); | ||
174 | } | 179 | } |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fccc..7505f7b13e71 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -259,23 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |||
259 | } | 259 | } |
260 | #endif | 260 | #endif |
261 | 261 | ||
262 | static int disable_smep __cpuinitdata; | ||
263 | static __init int setup_disable_smep(char *arg) | 262 | static __init int setup_disable_smep(char *arg) |
264 | { | 263 | { |
265 | disable_smep = 1; | 264 | setup_clear_cpu_cap(X86_FEATURE_SMEP); |
266 | return 1; | 265 | return 1; |
267 | } | 266 | } |
268 | __setup("nosmep", setup_disable_smep); | 267 | __setup("nosmep", setup_disable_smep); |
269 | 268 | ||
270 | static __cpuinit void setup_smep(struct cpuinfo_x86 *c) | 269 | static __always_inline void setup_smep(struct cpuinfo_x86 *c) |
271 | { | 270 | { |
272 | if (cpu_has(c, X86_FEATURE_SMEP)) { | 271 | if (cpu_has(c, X86_FEATURE_SMEP)) |
273 | if (unlikely(disable_smep)) { | 272 | set_in_cr4(X86_CR4_SMEP); |
274 | setup_clear_cpu_cap(X86_FEATURE_SMEP); | 273 | } |
275 | clear_in_cr4(X86_CR4_SMEP); | 274 | |
276 | } else | 275 | static __init int setup_disable_smap(char *arg) |
277 | set_in_cr4(X86_CR4_SMEP); | 276 | { |
278 | } | 277 | setup_clear_cpu_cap(X86_FEATURE_SMAP); |
278 | return 1; | ||
279 | } | ||
280 | __setup("nosmap", setup_disable_smap); | ||
281 | |||
282 | static __always_inline void setup_smap(struct cpuinfo_x86 *c) | ||
283 | { | ||
284 | unsigned long eflags; | ||
285 | |||
286 | /* This should have been cleared long ago */ | ||
287 | raw_local_save_flags(eflags); | ||
288 | BUG_ON(eflags & X86_EFLAGS_AC); | ||
289 | |||
290 | if (cpu_has(c, X86_FEATURE_SMAP)) | ||
291 | set_in_cr4(X86_CR4_SMAP); | ||
279 | } | 292 | } |
280 | 293 | ||
281 | /* | 294 | /* |
@@ -476,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) | |||
476 | 489 | ||
477 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 490 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ |
478 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 491 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ |
479 | "tlb_flushall_shift is 0x%x\n", | 492 | "tlb_flushall_shift: %d\n", |
480 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | 493 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
481 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | 494 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], |
482 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | 495 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], |
@@ -712,8 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
712 | c->cpu_index = 0; | 725 | c->cpu_index = 0; |
713 | filter_cpuid_features(c, false); | 726 | filter_cpuid_features(c, false); |
714 | 727 | ||
715 | setup_smep(c); | ||
716 | |||
717 | if (this_cpu->c_bsp_init) | 728 | if (this_cpu->c_bsp_init) |
718 | this_cpu->c_bsp_init(c); | 729 | this_cpu->c_bsp_init(c); |
719 | } | 730 | } |
@@ -798,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
798 | c->phys_proc_id = c->initial_apicid; | 809 | c->phys_proc_id = c->initial_apicid; |
799 | } | 810 | } |
800 | 811 | ||
801 | setup_smep(c); | ||
802 | |||
803 | get_model_name(c); /* Default name */ | 812 | get_model_name(c); /* Default name */ |
804 | 813 | ||
805 | detect_nopl(c); | 814 | detect_nopl(c); |
@@ -864,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
864 | /* Disable the PN if appropriate */ | 873 | /* Disable the PN if appropriate */ |
865 | squash_the_stupid_serial_number(c); | 874 | squash_the_stupid_serial_number(c); |
866 | 875 | ||
876 | /* Set up SMEP/SMAP */ | ||
877 | setup_smep(c); | ||
878 | setup_smap(c); | ||
879 | |||
867 | /* | 880 | /* |
868 | * The vendor-specific functions might have changed features. | 881 | * The vendor-specific functions might have changed features. |
869 | * Now we do "generic changes." | 882 | * Now we do "generic changes." |
@@ -942,8 +955,7 @@ void __init identify_boot_cpu(void) | |||
942 | #else | 955 | #else |
943 | vgetcpu_set_mode(); | 956 | vgetcpu_set_mode(); |
944 | #endif | 957 | #endif |
945 | if (boot_cpu_data.cpuid_level >= 2) | 958 | cpu_detect_tlb(&boot_cpu_data); |
946 | cpu_detect_tlb(&boot_cpu_data); | ||
947 | } | 959 | } |
948 | 960 | ||
949 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 961 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
@@ -1023,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |||
1023 | printk(KERN_CONT "%s ", vendor); | 1035 | printk(KERN_CONT "%s ", vendor); |
1024 | 1036 | ||
1025 | if (c->x86_model_id[0]) | 1037 | if (c->x86_model_id[0]) |
1026 | printk(KERN_CONT "%s", c->x86_model_id); | 1038 | printk(KERN_CONT "%s", strim(c->x86_model_id)); |
1027 | else | 1039 | else |
1028 | printk(KERN_CONT "%d86", c->x86); | 1040 | printk(KERN_CONT "%d86", c->x86); |
1029 | 1041 | ||
1042 | printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); | ||
1043 | |||
1030 | if (c->x86_mask || c->cpuid_level >= 0) | 1044 | if (c->x86_mask || c->cpuid_level >= 0) |
1031 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); | 1045 | printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); |
1032 | else | 1046 | else |
1033 | printk(KERN_CONT "\n"); | 1047 | printk(KERN_CONT ")\n"); |
1034 | 1048 | ||
1035 | print_cpu_msr(c); | 1049 | print_cpu_msr(c); |
1036 | } | 1050 | } |
@@ -1113,11 +1127,10 @@ void syscall_init(void) | |||
1113 | 1127 | ||
1114 | /* Flags to clear on syscall */ | 1128 | /* Flags to clear on syscall */ |
1115 | wrmsrl(MSR_SYSCALL_MASK, | 1129 | wrmsrl(MSR_SYSCALL_MASK, |
1116 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | 1130 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| |
1131 | X86_EFLAGS_IOPL|X86_EFLAGS_AC); | ||
1117 | } | 1132 | } |
1118 | 1133 | ||
1119 | unsigned long kernel_eflags; | ||
1120 | |||
1121 | /* | 1134 | /* |
1122 | * Copies of the original ist values from the tss are only accessed during | 1135 | * Copies of the original ist values from the tss are only accessed during |
1123 | * debugging, no special alignment required. | 1136 | * debugging, no special alignment required. |
@@ -1297,9 +1310,6 @@ void __cpuinit cpu_init(void) | |||
1297 | dbg_restore_debug_regs(); | 1310 | dbg_restore_debug_regs(); |
1298 | 1311 | ||
1299 | fpu_init(); | 1312 | fpu_init(); |
1300 | xsave_init(); | ||
1301 | |||
1302 | raw_local_save_flags(kernel_eflags); | ||
1303 | 1313 | ||
1304 | if (is_uv_system()) | 1314 | if (is_uv_system()) |
1305 | uv_cpu_init(); | 1315 | uv_cpu_init(); |
@@ -1352,6 +1362,5 @@ void __cpuinit cpu_init(void) | |||
1352 | dbg_restore_debug_regs(); | 1362 | dbg_restore_debug_regs(); |
1353 | 1363 | ||
1354 | fpu_init(); | 1364 | fpu_init(); |
1355 | xsave_init(); | ||
1356 | } | 1365 | } |
1357 | #endif | 1366 | #endif |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0a4ce2980a5a..198e019a531a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) | |||
648 | int i, j, n; | 648 | int i, j, n; |
649 | unsigned int regs[4]; | 649 | unsigned int regs[4]; |
650 | unsigned char *desc = (unsigned char *)regs; | 650 | unsigned char *desc = (unsigned char *)regs; |
651 | |||
652 | if (c->cpuid_level < 2) | ||
653 | return; | ||
654 | |||
651 | /* Number of times to iterate */ | 655 | /* Number of times to iterate */ |
652 | n = cpuid_eax(2) & 0xFF; | 656 | n = cpuid_eax(2) & 0xFF; |
653 | 657 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index fc4beb393577..ddc72f839332 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) | |||
78 | } | 78 | } |
79 | 79 | ||
80 | static cpumask_var_t mce_inject_cpumask; | 80 | static cpumask_var_t mce_inject_cpumask; |
81 | static DEFINE_MUTEX(mce_inject_mutex); | ||
81 | 82 | ||
82 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) | 83 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) |
83 | { | 84 | { |
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m) | |||
194 | put_online_cpus(); | 195 | put_online_cpus(); |
195 | } else | 196 | } else |
196 | #endif | 197 | #endif |
198 | { | ||
199 | preempt_disable(); | ||
197 | raise_local(); | 200 | raise_local(); |
201 | preempt_enable(); | ||
202 | } | ||
198 | } | 203 | } |
199 | 204 | ||
200 | /* Error injection interface */ | 205 | /* Error injection interface */ |
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
225 | * so do it a jiffie or two later everywhere. | 230 | * so do it a jiffie or two later everywhere. |
226 | */ | 231 | */ |
227 | schedule_timeout(2); | 232 | schedule_timeout(2); |
233 | |||
234 | mutex_lock(&mce_inject_mutex); | ||
228 | raise_mce(&m); | 235 | raise_mce(&m); |
236 | mutex_unlock(&mce_inject_mutex); | ||
229 | return usize; | 237 | return usize; |
230 | } | 238 | } |
231 | 239 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ed44c8a65858..6a05c1d327a9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -28,6 +28,18 @@ extern int mce_ser; | |||
28 | 28 | ||
29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
30 | 30 | ||
31 | #ifdef CONFIG_X86_MCE_INTEL | ||
32 | unsigned long mce_intel_adjust_timer(unsigned long interval); | ||
33 | void mce_intel_cmci_poll(void); | ||
34 | void mce_intel_hcpu_update(unsigned long cpu); | ||
35 | #else | ||
36 | # define mce_intel_adjust_timer mce_adjust_timer_default | ||
37 | static inline void mce_intel_cmci_poll(void) { } | ||
38 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } | ||
39 | #endif | ||
40 | |||
41 | void mce_timer_kick(unsigned long interval); | ||
42 | |||
31 | #ifdef CONFIG_ACPI_APEI | 43 | #ifdef CONFIG_ACPI_APEI |
32 | int apei_write_mce(struct mce *m); | 44 | int apei_write_mce(struct mce *m); |
33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | 45 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 292d0258311c..29e87d3b2843 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; | |||
83 | int mce_cmci_disabled __read_mostly; | 83 | int mce_cmci_disabled __read_mostly; |
84 | int mce_ignore_ce __read_mostly; | 84 | int mce_ignore_ce __read_mostly; |
85 | int mce_ser __read_mostly; | 85 | int mce_ser __read_mostly; |
86 | int mce_bios_cmci_threshold __read_mostly; | ||
86 | 87 | ||
87 | struct mce_bank *mce_banks __read_mostly; | 88 | struct mce_bank *mce_banks __read_mostly; |
88 | 89 | ||
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */ | |||
1266 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | 1267 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ |
1267 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1268 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1268 | 1269 | ||
1270 | static unsigned long mce_adjust_timer_default(unsigned long interval) | ||
1271 | { | ||
1272 | return interval; | ||
1273 | } | ||
1274 | |||
1275 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = | ||
1276 | mce_adjust_timer_default; | ||
1277 | |||
1269 | static void mce_timer_fn(unsigned long data) | 1278 | static void mce_timer_fn(unsigned long data) |
1270 | { | 1279 | { |
1271 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1280 | struct timer_list *t = &__get_cpu_var(mce_timer); |
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data) | |||
1276 | if (mce_available(__this_cpu_ptr(&cpu_info))) { | 1285 | if (mce_available(__this_cpu_ptr(&cpu_info))) { |
1277 | machine_check_poll(MCP_TIMESTAMP, | 1286 | machine_check_poll(MCP_TIMESTAMP, |
1278 | &__get_cpu_var(mce_poll_banks)); | 1287 | &__get_cpu_var(mce_poll_banks)); |
1288 | mce_intel_cmci_poll(); | ||
1279 | } | 1289 | } |
1280 | 1290 | ||
1281 | /* | 1291 | /* |
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data) | |||
1283 | * polling interval, otherwise increase the polling interval. | 1293 | * polling interval, otherwise increase the polling interval. |
1284 | */ | 1294 | */ |
1285 | iv = __this_cpu_read(mce_next_interval); | 1295 | iv = __this_cpu_read(mce_next_interval); |
1286 | if (mce_notify_irq()) | 1296 | if (mce_notify_irq()) { |
1287 | iv = max(iv / 2, (unsigned long) HZ/100); | 1297 | iv = max(iv / 2, (unsigned long) HZ/100); |
1288 | else | 1298 | } else { |
1289 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); | 1299 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
1300 | iv = mce_adjust_timer(iv); | ||
1301 | } | ||
1290 | __this_cpu_write(mce_next_interval, iv); | 1302 | __this_cpu_write(mce_next_interval, iv); |
1303 | /* Might have become 0 after CMCI storm subsided */ | ||
1304 | if (iv) { | ||
1305 | t->expires = jiffies + iv; | ||
1306 | add_timer_on(t, smp_processor_id()); | ||
1307 | } | ||
1308 | } | ||
1291 | 1309 | ||
1292 | t->expires = jiffies + iv; | 1310 | /* |
1293 | add_timer_on(t, smp_processor_id()); | 1311 | * Ensure that the timer is firing in @interval from now. |
1312 | */ | ||
1313 | void mce_timer_kick(unsigned long interval) | ||
1314 | { | ||
1315 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
1316 | unsigned long when = jiffies + interval; | ||
1317 | unsigned long iv = __this_cpu_read(mce_next_interval); | ||
1318 | |||
1319 | if (timer_pending(t)) { | ||
1320 | if (time_before(when, t->expires)) | ||
1321 | mod_timer_pinned(t, when); | ||
1322 | } else { | ||
1323 | t->expires = round_jiffies(when); | ||
1324 | add_timer_on(t, smp_processor_id()); | ||
1325 | } | ||
1326 | if (interval < iv) | ||
1327 | __this_cpu_write(mce_next_interval, interval); | ||
1294 | } | 1328 | } |
1295 | 1329 | ||
1296 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ | 1330 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ |
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1585 | switch (c->x86_vendor) { | 1619 | switch (c->x86_vendor) { |
1586 | case X86_VENDOR_INTEL: | 1620 | case X86_VENDOR_INTEL: |
1587 | mce_intel_feature_init(c); | 1621 | mce_intel_feature_init(c); |
1622 | mce_adjust_timer = mce_intel_adjust_timer; | ||
1588 | break; | 1623 | break; |
1589 | case X86_VENDOR_AMD: | 1624 | case X86_VENDOR_AMD: |
1590 | mce_amd_feature_init(c); | 1625 | mce_amd_feature_init(c); |
@@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1594 | } | 1629 | } |
1595 | } | 1630 | } |
1596 | 1631 | ||
1597 | static void __mcheck_cpu_init_timer(void) | 1632 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
1598 | { | 1633 | { |
1599 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1634 | unsigned long iv = mce_adjust_timer(check_interval * HZ); |
1600 | unsigned long iv = check_interval * HZ; | ||
1601 | 1635 | ||
1602 | setup_timer(t, mce_timer_fn, smp_processor_id()); | 1636 | __this_cpu_write(mce_next_interval, iv); |
1603 | 1637 | ||
1604 | if (mce_ignore_ce) | 1638 | if (mce_ignore_ce || !iv) |
1605 | return; | 1639 | return; |
1606 | 1640 | ||
1607 | __this_cpu_write(mce_next_interval, iv); | ||
1608 | if (!iv) | ||
1609 | return; | ||
1610 | t->expires = round_jiffies(jiffies + iv); | 1641 | t->expires = round_jiffies(jiffies + iv); |
1611 | add_timer_on(t, smp_processor_id()); | 1642 | add_timer_on(t, smp_processor_id()); |
1612 | } | 1643 | } |
1613 | 1644 | ||
1645 | static void __mcheck_cpu_init_timer(void) | ||
1646 | { | ||
1647 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
1648 | unsigned int cpu = smp_processor_id(); | ||
1649 | |||
1650 | setup_timer(t, mce_timer_fn, cpu); | ||
1651 | mce_start_timer(cpu, t); | ||
1652 | } | ||
1653 | |||
1614 | /* Handle unconfigured int18 (should never happen) */ | 1654 | /* Handle unconfigured int18 (should never happen) */ |
1615 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 1655 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
1616 | { | 1656 | { |
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { | |||
1907 | * check, or 0 to not wait | 1947 | * check, or 0 to not wait |
1908 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | 1948 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
1909 | * mce=nobootlog Don't log MCEs from before booting. | 1949 | * mce=nobootlog Don't log MCEs from before booting. |
1950 | * mce=bios_cmci_threshold Don't program the CMCI threshold | ||
1910 | */ | 1951 | */ |
1911 | static int __init mcheck_enable(char *str) | 1952 | static int __init mcheck_enable(char *str) |
1912 | { | 1953 | { |
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str) | |||
1926 | mce_ignore_ce = 1; | 1967 | mce_ignore_ce = 1; |
1927 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1968 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
1928 | mce_bootlog = (str[0] == 'b'); | 1969 | mce_bootlog = (str[0] == 'b'); |
1970 | else if (!strcmp(str, "bios_cmci_threshold")) | ||
1971 | mce_bios_cmci_threshold = 1; | ||
1929 | else if (isdigit(str[0])) { | 1972 | else if (isdigit(str[0])) { |
1930 | get_option(&str, &tolerant); | 1973 | get_option(&str, &tolerant); |
1931 | if (*str == ',') { | 1974 | if (*str == ',') { |
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = { | |||
2166 | &mce_cmci_disabled | 2209 | &mce_cmci_disabled |
2167 | }; | 2210 | }; |
2168 | 2211 | ||
2212 | static struct dev_ext_attribute dev_attr_bios_cmci_threshold = { | ||
2213 | __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL), | ||
2214 | &mce_bios_cmci_threshold | ||
2215 | }; | ||
2216 | |||
2169 | static struct device_attribute *mce_device_attrs[] = { | 2217 | static struct device_attribute *mce_device_attrs[] = { |
2170 | &dev_attr_tolerant.attr, | 2218 | &dev_attr_tolerant.attr, |
2171 | &dev_attr_check_interval.attr, | 2219 | &dev_attr_check_interval.attr, |
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = { | |||
2174 | &dev_attr_dont_log_ce.attr, | 2222 | &dev_attr_dont_log_ce.attr, |
2175 | &dev_attr_ignore_ce.attr, | 2223 | &dev_attr_ignore_ce.attr, |
2176 | &dev_attr_cmci_disabled.attr, | 2224 | &dev_attr_cmci_disabled.attr, |
2225 | &dev_attr_bios_cmci_threshold.attr, | ||
2177 | NULL | 2226 | NULL |
2178 | }; | 2227 | }; |
2179 | 2228 | ||
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2294 | unsigned int cpu = (unsigned long)hcpu; | 2343 | unsigned int cpu = (unsigned long)hcpu; |
2295 | struct timer_list *t = &per_cpu(mce_timer, cpu); | 2344 | struct timer_list *t = &per_cpu(mce_timer, cpu); |
2296 | 2345 | ||
2297 | switch (action) { | 2346 | switch (action & ~CPU_TASKS_FROZEN) { |
2298 | case CPU_ONLINE: | 2347 | case CPU_ONLINE: |
2299 | case CPU_ONLINE_FROZEN: | ||
2300 | mce_device_create(cpu); | 2348 | mce_device_create(cpu); |
2301 | if (threshold_cpu_callback) | 2349 | if (threshold_cpu_callback) |
2302 | threshold_cpu_callback(action, cpu); | 2350 | threshold_cpu_callback(action, cpu); |
2303 | break; | 2351 | break; |
2304 | case CPU_DEAD: | 2352 | case CPU_DEAD: |
2305 | case CPU_DEAD_FROZEN: | ||
2306 | if (threshold_cpu_callback) | 2353 | if (threshold_cpu_callback) |
2307 | threshold_cpu_callback(action, cpu); | 2354 | threshold_cpu_callback(action, cpu); |
2308 | mce_device_remove(cpu); | 2355 | mce_device_remove(cpu); |
2356 | mce_intel_hcpu_update(cpu); | ||
2309 | break; | 2357 | break; |
2310 | case CPU_DOWN_PREPARE: | 2358 | case CPU_DOWN_PREPARE: |
2311 | case CPU_DOWN_PREPARE_FROZEN: | ||
2312 | del_timer_sync(t); | ||
2313 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | 2359 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); |
2360 | del_timer_sync(t); | ||
2314 | break; | 2361 | break; |
2315 | case CPU_DOWN_FAILED: | 2362 | case CPU_DOWN_FAILED: |
2316 | case CPU_DOWN_FAILED_FROZEN: | ||
2317 | if (!mce_ignore_ce && check_interval) { | ||
2318 | t->expires = round_jiffies(jiffies + | ||
2319 | per_cpu(mce_next_interval, cpu)); | ||
2320 | add_timer_on(t, cpu); | ||
2321 | } | ||
2322 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 2363 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
2364 | mce_start_timer(cpu, t); | ||
2323 | break; | 2365 | break; |
2324 | case CPU_POST_DEAD: | 2366 | } |
2367 | |||
2368 | if (action == CPU_POST_DEAD) { | ||
2325 | /* intentionally ignoring frozen here */ | 2369 | /* intentionally ignoring frozen here */ |
2326 | cmci_rediscover(cpu); | 2370 | cmci_rediscover(cpu); |
2327 | break; | ||
2328 | } | 2371 | } |
2372 | |||
2329 | return NOTIFY_OK; | 2373 | return NOTIFY_OK; |
2330 | } | 2374 | } |
2331 | 2375 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 38e49bc95ffc..5f88abf07e9c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
16 | #include <asm/mce.h> | 16 | #include <asm/mce.h> |
17 | 17 | ||
18 | #include "mce-internal.h" | ||
19 | |||
18 | /* | 20 | /* |
19 | * Support for Intel Correct Machine Check Interrupts. This allows | 21 | * Support for Intel Correct Machine Check Interrupts. This allows |
20 | * the CPU to raise an interrupt when a corrected machine check happened. | 22 | * the CPU to raise an interrupt when a corrected machine check happened. |
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | |||
30 | */ | 32 | */ |
31 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); | 33 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); |
32 | 34 | ||
33 | #define CMCI_THRESHOLD 1 | 35 | #define CMCI_THRESHOLD 1 |
36 | #define CMCI_POLL_INTERVAL (30 * HZ) | ||
37 | #define CMCI_STORM_INTERVAL (1 * HZ) | ||
38 | #define CMCI_STORM_THRESHOLD 15 | ||
39 | |||
40 | static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); | ||
41 | static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); | ||
42 | static DEFINE_PER_CPU(unsigned int, cmci_storm_state); | ||
43 | |||
44 | enum { | ||
45 | CMCI_STORM_NONE, | ||
46 | CMCI_STORM_ACTIVE, | ||
47 | CMCI_STORM_SUBSIDED, | ||
48 | }; | ||
49 | |||
50 | static atomic_t cmci_storm_on_cpus; | ||
34 | 51 | ||
35 | static int cmci_supported(int *banks) | 52 | static int cmci_supported(int *banks) |
36 | { | 53 | { |
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks) | |||
53 | return !!(cap & MCG_CMCI_P); | 70 | return !!(cap & MCG_CMCI_P); |
54 | } | 71 | } |
55 | 72 | ||
73 | void mce_intel_cmci_poll(void) | ||
74 | { | ||
75 | if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) | ||
76 | return; | ||
77 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
78 | } | ||
79 | |||
80 | void mce_intel_hcpu_update(unsigned long cpu) | ||
81 | { | ||
82 | if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) | ||
83 | atomic_dec(&cmci_storm_on_cpus); | ||
84 | |||
85 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; | ||
86 | } | ||
87 | |||
88 | unsigned long mce_intel_adjust_timer(unsigned long interval) | ||
89 | { | ||
90 | int r; | ||
91 | |||
92 | if (interval < CMCI_POLL_INTERVAL) | ||
93 | return interval; | ||
94 | |||
95 | switch (__this_cpu_read(cmci_storm_state)) { | ||
96 | case CMCI_STORM_ACTIVE: | ||
97 | /* | ||
98 | * We switch back to interrupt mode once the poll timer has | ||
99 | * silenced itself. That means no events recorded and the | ||
100 | * timer interval is back to our poll interval. | ||
101 | */ | ||
102 | __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); | ||
103 | r = atomic_sub_return(1, &cmci_storm_on_cpus); | ||
104 | if (r == 0) | ||
105 | pr_notice("CMCI storm subsided: switching to interrupt mode\n"); | ||
106 | /* FALLTHROUGH */ | ||
107 | |||
108 | case CMCI_STORM_SUBSIDED: | ||
109 | /* | ||
110 | * We wait for all cpus to go back to SUBSIDED | ||
111 | * state. When that happens we switch back to | ||
112 | * interrupt mode. | ||
113 | */ | ||
114 | if (!atomic_read(&cmci_storm_on_cpus)) { | ||
115 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); | ||
116 | cmci_reenable(); | ||
117 | cmci_recheck(); | ||
118 | } | ||
119 | return CMCI_POLL_INTERVAL; | ||
120 | default: | ||
121 | /* | ||
122 | * We have shiny weather. Let the poll do whatever it | ||
123 | * thinks. | ||
124 | */ | ||
125 | return interval; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static bool cmci_storm_detect(void) | ||
130 | { | ||
131 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); | ||
132 | unsigned long ts = __this_cpu_read(cmci_time_stamp); | ||
133 | unsigned long now = jiffies; | ||
134 | int r; | ||
135 | |||
136 | if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) | ||
137 | return true; | ||
138 | |||
139 | if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { | ||
140 | cnt++; | ||
141 | } else { | ||
142 | cnt = 1; | ||
143 | __this_cpu_write(cmci_time_stamp, now); | ||
144 | } | ||
145 | __this_cpu_write(cmci_storm_cnt, cnt); | ||
146 | |||
147 | if (cnt <= CMCI_STORM_THRESHOLD) | ||
148 | return false; | ||
149 | |||
150 | cmci_clear(); | ||
151 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); | ||
152 | r = atomic_add_return(1, &cmci_storm_on_cpus); | ||
153 | mce_timer_kick(CMCI_POLL_INTERVAL); | ||
154 | |||
155 | if (r == 1) | ||
156 | pr_notice("CMCI storm detected: switching to poll mode\n"); | ||
157 | return true; | ||
158 | } | ||
159 | |||
56 | /* | 160 | /* |
57 | * The interrupt handler. This is called on every event. | 161 | * The interrupt handler. This is called on every event. |
58 | * Just call the poller directly to log any events. | 162 | * Just call the poller directly to log any events. |
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks) | |||
61 | */ | 165 | */ |
62 | static void intel_threshold_interrupt(void) | 166 | static void intel_threshold_interrupt(void) |
63 | { | 167 | { |
168 | if (cmci_storm_detect()) | ||
169 | return; | ||
64 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | 170 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
65 | mce_notify_irq(); | 171 | mce_notify_irq(); |
66 | } | 172 | } |
67 | 173 | ||
68 | static void print_update(char *type, int *hdr, int num) | ||
69 | { | ||
70 | if (*hdr == 0) | ||
71 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
72 | *hdr = 1; | ||
73 | printk(KERN_CONT " %s:%d", type, num); | ||
74 | } | ||
75 | |||
76 | /* | 174 | /* |
77 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | 175 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks |
78 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | 176 | * on this CPU. Use the algorithm recommended in the SDM to discover shared |
79 | * banks. | 177 | * banks. |
80 | */ | 178 | */ |
81 | static void cmci_discover(int banks, int boot) | 179 | static void cmci_discover(int banks) |
82 | { | 180 | { |
83 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | 181 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); |
84 | unsigned long flags; | 182 | unsigned long flags; |
85 | int hdr = 0; | ||
86 | int i; | 183 | int i; |
184 | int bios_wrong_thresh = 0; | ||
87 | 185 | ||
88 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | 186 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
89 | for (i = 0; i < banks; i++) { | 187 | for (i = 0; i < banks; i++) { |
90 | u64 val; | 188 | u64 val; |
189 | int bios_zero_thresh = 0; | ||
91 | 190 | ||
92 | if (test_bit(i, owned)) | 191 | if (test_bit(i, owned)) |
93 | continue; | 192 | continue; |
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot) | |||
96 | 195 | ||
97 | /* Already owned by someone else? */ | 196 | /* Already owned by someone else? */ |
98 | if (val & MCI_CTL2_CMCI_EN) { | 197 | if (val & MCI_CTL2_CMCI_EN) { |
99 | if (test_and_clear_bit(i, owned) && !boot) | 198 | clear_bit(i, owned); |
100 | print_update("SHD", &hdr, i); | ||
101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 199 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
102 | continue; | 200 | continue; |
103 | } | 201 | } |
104 | 202 | ||
105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; | 203 | if (!mce_bios_cmci_threshold) { |
106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | 204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
205 | val |= CMCI_THRESHOLD; | ||
206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { | ||
207 | /* | ||
208 | * If bios_cmci_threshold boot option was specified | ||
209 | * but the threshold is zero, we'll try to initialize | ||
210 | * it to 1. | ||
211 | */ | ||
212 | bios_zero_thresh = 1; | ||
213 | val |= CMCI_THRESHOLD; | ||
214 | } | ||
215 | |||
216 | val |= MCI_CTL2_CMCI_EN; | ||
107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 217 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 218 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
109 | 219 | ||
110 | /* Did the enable bit stick? -- the bank supports CMCI */ | 220 | /* Did the enable bit stick? -- the bank supports CMCI */ |
111 | if (val & MCI_CTL2_CMCI_EN) { | 221 | if (val & MCI_CTL2_CMCI_EN) { |
112 | if (!test_and_set_bit(i, owned) && !boot) | 222 | set_bit(i, owned); |
113 | print_update("CMCI", &hdr, i); | ||
114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 223 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
224 | /* | ||
225 | * We are able to set thresholds for some banks that | ||
226 | * had a threshold of 0. This means the BIOS has not | ||
227 | * set the thresholds properly or does not work with | ||
228 | * this boot option. Note down now and report later. | ||
229 | */ | ||
230 | if (mce_bios_cmci_threshold && bios_zero_thresh && | ||
231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) | ||
232 | bios_wrong_thresh = 1; | ||
115 | } else { | 233 | } else { |
116 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | 234 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
117 | } | 235 | } |
118 | } | 236 | } |
119 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
120 | if (hdr) | 238 | if (mce_bios_cmci_threshold && bios_wrong_thresh) { |
121 | printk(KERN_CONT "\n"); | 239 | pr_info_once( |
240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); | ||
241 | pr_info_once( | ||
242 | "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); | ||
243 | } | ||
122 | } | 244 | } |
123 | 245 | ||
124 | /* | 246 | /* |
@@ -156,7 +278,7 @@ void cmci_clear(void) | |||
156 | continue; | 278 | continue; |
157 | /* Disable CMCI */ | 279 | /* Disable CMCI */ |
158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 280 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); | 281 | val &= ~MCI_CTL2_CMCI_EN; |
160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 282 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 283 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
162 | } | 284 | } |
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying) | |||
186 | continue; | 308 | continue; |
187 | /* Recheck banks in case CPUs don't all have the same */ | 309 | /* Recheck banks in case CPUs don't all have the same */ |
188 | if (cmci_supported(&banks)) | 310 | if (cmci_supported(&banks)) |
189 | cmci_discover(banks, 0); | 311 | cmci_discover(banks); |
190 | } | 312 | } |
191 | 313 | ||
192 | set_cpus_allowed_ptr(current, old); | 314 | set_cpus_allowed_ptr(current, old); |
@@ -200,7 +322,7 @@ void cmci_reenable(void) | |||
200 | { | 322 | { |
201 | int banks; | 323 | int banks; |
202 | if (cmci_supported(&banks)) | 324 | if (cmci_supported(&banks)) |
203 | cmci_discover(banks, 0); | 325 | cmci_discover(banks); |
204 | } | 326 | } |
205 | 327 | ||
206 | static void intel_init_cmci(void) | 328 | static void intel_init_cmci(void) |
@@ -211,7 +333,7 @@ static void intel_init_cmci(void) | |||
211 | return; | 333 | return; |
212 | 334 | ||
213 | mce_threshold_vector = intel_threshold_interrupt; | 335 | mce_threshold_vector = intel_threshold_interrupt; |
214 | cmci_discover(banks, 1); | 336 | cmci_discover(banks); |
215 | /* | 337 | /* |
216 | * For CPU #0 this runs with still disabled APIC, but that's | 338 | * For CPU #0 this runs with still disabled APIC, but that's |
217 | * ok because only the vector is set up. We still do another | 339 | * ok because only the vector is set up. We still do another |
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl index c7b3fe2d72e0..091972ef49de 100644 --- a/arch/x86/kernel/cpu/mkcapflags.pl +++ b/arch/x86/kernel/cpu/mkcapflags.pl | |||
@@ -8,7 +8,10 @@ | |||
8 | open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; | 8 | open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; |
9 | open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; | 9 | open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; |
10 | 10 | ||
11 | print OUT "#include <asm/cpufeature.h>\n\n"; | 11 | print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n"; |
12 | print OUT "#include <asm/cpufeature.h>\n"; | ||
13 | print OUT "#endif\n"; | ||
14 | print OUT "\n"; | ||
12 | print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; | 15 | print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; |
13 | 16 | ||
14 | %features = (); | 17 | %features = (); |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6605a81ba339..271d25700297 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[]; | |||
586 | 586 | ||
587 | extern struct event_constraint intel_snb_pebs_event_constraints[]; | 587 | extern struct event_constraint intel_snb_pebs_event_constraints[]; |
588 | 588 | ||
589 | extern struct event_constraint intel_ivb_pebs_event_constraints[]; | ||
590 | |||
589 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); | 591 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); |
590 | 592 | ||
591 | void intel_pmu_pebs_enable(struct perf_event *event); | 593 | void intel_pmu_pebs_enable(struct perf_event *event); |
@@ -624,6 +626,8 @@ int p4_pmu_init(void); | |||
624 | 626 | ||
625 | int p6_pmu_init(void); | 627 | int p6_pmu_init(void); |
626 | 628 | ||
629 | int knc_pmu_init(void); | ||
630 | |||
627 | #else /* CONFIG_CPU_SUP_INTEL */ | 631 | #else /* CONFIG_CPU_SUP_INTEL */ |
628 | 632 | ||
629 | static inline void reserve_ds_buffers(void) | 633 | static inline void reserve_ds_buffers(void) |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 7bfb5bec8630..6336bcbd0618 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -41,17 +41,22 @@ struct cpu_perf_ibs { | |||
41 | }; | 41 | }; |
42 | 42 | ||
43 | struct perf_ibs { | 43 | struct perf_ibs { |
44 | struct pmu pmu; | 44 | struct pmu pmu; |
45 | unsigned int msr; | 45 | unsigned int msr; |
46 | u64 config_mask; | 46 | u64 config_mask; |
47 | u64 cnt_mask; | 47 | u64 cnt_mask; |
48 | u64 enable_mask; | 48 | u64 enable_mask; |
49 | u64 valid_mask; | 49 | u64 valid_mask; |
50 | u64 max_period; | 50 | u64 max_period; |
51 | unsigned long offset_mask[1]; | 51 | unsigned long offset_mask[1]; |
52 | int offset_max; | 52 | int offset_max; |
53 | struct cpu_perf_ibs __percpu *pcpu; | 53 | struct cpu_perf_ibs __percpu *pcpu; |
54 | u64 (*get_count)(u64 config); | 54 | |
55 | struct attribute **format_attrs; | ||
56 | struct attribute_group format_group; | ||
57 | const struct attribute_group *attr_groups[2]; | ||
58 | |||
59 | u64 (*get_count)(u64 config); | ||
55 | }; | 60 | }; |
56 | 61 | ||
57 | struct perf_ibs_data { | 62 | struct perf_ibs_data { |
@@ -209,6 +214,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) | |||
209 | return -EOPNOTSUPP; | 214 | return -EOPNOTSUPP; |
210 | } | 215 | } |
211 | 216 | ||
217 | static const struct perf_event_attr ibs_notsupp = { | ||
218 | .exclude_user = 1, | ||
219 | .exclude_kernel = 1, | ||
220 | .exclude_hv = 1, | ||
221 | .exclude_idle = 1, | ||
222 | .exclude_host = 1, | ||
223 | .exclude_guest = 1, | ||
224 | }; | ||
225 | |||
212 | static int perf_ibs_init(struct perf_event *event) | 226 | static int perf_ibs_init(struct perf_event *event) |
213 | { | 227 | { |
214 | struct hw_perf_event *hwc = &event->hw; | 228 | struct hw_perf_event *hwc = &event->hw; |
@@ -229,6 +243,9 @@ static int perf_ibs_init(struct perf_event *event) | |||
229 | if (event->pmu != &perf_ibs->pmu) | 243 | if (event->pmu != &perf_ibs->pmu) |
230 | return -ENOENT; | 244 | return -ENOENT; |
231 | 245 | ||
246 | if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) | ||
247 | return -EINVAL; | ||
248 | |||
232 | if (config & ~perf_ibs->config_mask) | 249 | if (config & ~perf_ibs->config_mask) |
233 | return -EINVAL; | 250 | return -EINVAL; |
234 | 251 | ||
@@ -434,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags) | |||
434 | 451 | ||
435 | static void perf_ibs_read(struct perf_event *event) { } | 452 | static void perf_ibs_read(struct perf_event *event) { } |
436 | 453 | ||
454 | PMU_FORMAT_ATTR(rand_en, "config:57"); | ||
455 | PMU_FORMAT_ATTR(cnt_ctl, "config:19"); | ||
456 | |||
457 | static struct attribute *ibs_fetch_format_attrs[] = { | ||
458 | &format_attr_rand_en.attr, | ||
459 | NULL, | ||
460 | }; | ||
461 | |||
462 | static struct attribute *ibs_op_format_attrs[] = { | ||
463 | NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */ | ||
464 | NULL, | ||
465 | }; | ||
466 | |||
437 | static struct perf_ibs perf_ibs_fetch = { | 467 | static struct perf_ibs perf_ibs_fetch = { |
438 | .pmu = { | 468 | .pmu = { |
439 | .task_ctx_nr = perf_invalid_context, | 469 | .task_ctx_nr = perf_invalid_context, |
@@ -453,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = { | |||
453 | .max_period = IBS_FETCH_MAX_CNT << 4, | 483 | .max_period = IBS_FETCH_MAX_CNT << 4, |
454 | .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, | 484 | .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, |
455 | .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, | 485 | .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, |
486 | .format_attrs = ibs_fetch_format_attrs, | ||
456 | 487 | ||
457 | .get_count = get_ibs_fetch_count, | 488 | .get_count = get_ibs_fetch_count, |
458 | }; | 489 | }; |
@@ -476,6 +507,7 @@ static struct perf_ibs perf_ibs_op = { | |||
476 | .max_period = IBS_OP_MAX_CNT << 4, | 507 | .max_period = IBS_OP_MAX_CNT << 4, |
477 | .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, | 508 | .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, |
478 | .offset_max = MSR_AMD64_IBSOP_REG_COUNT, | 509 | .offset_max = MSR_AMD64_IBSOP_REG_COUNT, |
510 | .format_attrs = ibs_op_format_attrs, | ||
479 | 511 | ||
480 | .get_count = get_ibs_op_count, | 512 | .get_count = get_ibs_op_count, |
481 | }; | 513 | }; |
@@ -585,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) | |||
585 | 617 | ||
586 | perf_ibs->pcpu = pcpu; | 618 | perf_ibs->pcpu = pcpu; |
587 | 619 | ||
620 | /* register attributes */ | ||
621 | if (perf_ibs->format_attrs[0]) { | ||
622 | memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group)); | ||
623 | perf_ibs->format_group.name = "format"; | ||
624 | perf_ibs->format_group.attrs = perf_ibs->format_attrs; | ||
625 | |||
626 | memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups)); | ||
627 | perf_ibs->attr_groups[0] = &perf_ibs->format_group; | ||
628 | perf_ibs->pmu.attr_groups = perf_ibs->attr_groups; | ||
629 | } | ||
630 | |||
588 | ret = perf_pmu_register(&perf_ibs->pmu, name, -1); | 631 | ret = perf_pmu_register(&perf_ibs->pmu, name, -1); |
589 | if (ret) { | 632 | if (ret) { |
590 | perf_ibs->pcpu = NULL; | 633 | perf_ibs->pcpu = NULL; |
@@ -596,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) | |||
596 | 639 | ||
597 | static __init int perf_event_ibs_init(void) | 640 | static __init int perf_event_ibs_init(void) |
598 | { | 641 | { |
642 | struct attribute **attr = ibs_op_format_attrs; | ||
643 | |||
599 | if (!ibs_caps) | 644 | if (!ibs_caps) |
600 | return -ENODEV; /* ibs not supported by the cpu */ | 645 | return -ENODEV; /* ibs not supported by the cpu */ |
601 | 646 | ||
602 | perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); | 647 | perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); |
603 | if (ibs_caps & IBS_CAPS_OPCNT) | 648 | |
649 | if (ibs_caps & IBS_CAPS_OPCNT) { | ||
604 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; | 650 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; |
651 | *attr++ = &format_attr_cnt_ctl.attr; | ||
652 | } | ||
605 | perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); | 653 | perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); |
654 | |||
606 | register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); | 655 | register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); |
607 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); | 656 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); |
608 | 657 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7f2739e03e79..324bb523d9d9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void) | |||
1906 | switch (boot_cpu_data.x86) { | 1906 | switch (boot_cpu_data.x86) { |
1907 | case 0x6: | 1907 | case 0x6: |
1908 | return p6_pmu_init(); | 1908 | return p6_pmu_init(); |
1909 | case 0xb: | ||
1910 | return knc_pmu_init(); | ||
1909 | case 0xf: | 1911 | case 0xf: |
1910 | return p4_pmu_init(); | 1912 | return p4_pmu_init(); |
1911 | } | 1913 | } |
@@ -2008,6 +2010,7 @@ __init int intel_pmu_init(void) | |||
2008 | break; | 2010 | break; |
2009 | 2011 | ||
2010 | case 28: /* Atom */ | 2012 | case 28: /* Atom */ |
2013 | case 54: /* Cedariew */ | ||
2011 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 2014 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
2012 | sizeof(hw_cache_event_ids)); | 2015 | sizeof(hw_cache_event_ids)); |
2013 | 2016 | ||
@@ -2047,7 +2050,6 @@ __init int intel_pmu_init(void) | |||
2047 | case 42: /* SandyBridge */ | 2050 | case 42: /* SandyBridge */ |
2048 | case 45: /* SandyBridge, "Romely-EP" */ | 2051 | case 45: /* SandyBridge, "Romely-EP" */ |
2049 | x86_add_quirk(intel_sandybridge_quirk); | 2052 | x86_add_quirk(intel_sandybridge_quirk); |
2050 | case 58: /* IvyBridge */ | ||
2051 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 2053 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
2052 | sizeof(hw_cache_event_ids)); | 2054 | sizeof(hw_cache_event_ids)); |
2053 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, | 2055 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, |
@@ -2072,6 +2074,29 @@ __init int intel_pmu_init(void) | |||
2072 | 2074 | ||
2073 | pr_cont("SandyBridge events, "); | 2075 | pr_cont("SandyBridge events, "); |
2074 | break; | 2076 | break; |
2077 | case 58: /* IvyBridge */ | ||
2078 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | ||
2079 | sizeof(hw_cache_event_ids)); | ||
2080 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, | ||
2081 | sizeof(hw_cache_extra_regs)); | ||
2082 | |||
2083 | intel_pmu_lbr_init_snb(); | ||
2084 | |||
2085 | x86_pmu.event_constraints = intel_snb_event_constraints; | ||
2086 | x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; | ||
2087 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; | ||
2088 | x86_pmu.extra_regs = intel_snb_extra_regs; | ||
2089 | /* all extra regs are per-cpu when HT is on */ | ||
2090 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
2091 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | ||
2092 | |||
2093 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | ||
2094 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | ||
2095 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
2096 | |||
2097 | pr_cont("IvyBridge events, "); | ||
2098 | break; | ||
2099 | |||
2075 | 2100 | ||
2076 | default: | 2101 | default: |
2077 | switch (x86_pmu.version) { | 2102 | switch (x86_pmu.version) { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index e38d97bf4259..826054a4f2ee 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { | |||
407 | EVENT_CONSTRAINT_END | 407 | EVENT_CONSTRAINT_END |
408 | }; | 408 | }; |
409 | 409 | ||
410 | struct event_constraint intel_ivb_pebs_event_constraints[] = { | ||
411 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ | ||
412 | INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ | ||
413 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | ||
414 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | ||
415 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | ||
416 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ | ||
417 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ | ||
418 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | ||
419 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | ||
420 | INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ | ||
421 | EVENT_CONSTRAINT_END | ||
422 | }; | ||
423 | |||
410 | struct event_constraint *intel_pebs_constraints(struct perf_event *event) | 424 | struct event_constraint *intel_pebs_constraints(struct perf_event *event) |
411 | { | 425 | { |
412 | struct event_constraint *c; | 426 | struct event_constraint *c; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 520b4265fcd2..da02e9cc3754 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void) | |||
686 | * to have an operational LBR which can freeze | 686 | * to have an operational LBR which can freeze |
687 | * on PMU interrupt | 687 | * on PMU interrupt |
688 | */ | 688 | */ |
689 | if (boot_cpu_data.x86_mask < 10) { | 689 | if (boot_cpu_data.x86_model == 28 |
690 | && boot_cpu_data.x86_mask < 10) { | ||
690 | pr_cont("LBR disabled due to erratum"); | 691 | pr_cont("LBR disabled due to erratum"); |
691 | return; | 692 | return; |
692 | } | 693 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0a5571080e74..99d96a4978b5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) | |||
661 | } | 661 | } |
662 | } | 662 | } |
663 | 663 | ||
664 | static struct uncore_event_desc snb_uncore_events[] = { | ||
665 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), | ||
666 | { /* end: all zeroes */ }, | ||
667 | }; | ||
668 | |||
664 | static struct attribute *snb_uncore_formats_attr[] = { | 669 | static struct attribute *snb_uncore_formats_attr[] = { |
665 | &format_attr_event.attr, | 670 | &format_attr_event.attr, |
666 | &format_attr_umask.attr, | 671 | &format_attr_umask.attr, |
@@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = { | |||
704 | .constraints = snb_uncore_cbox_constraints, | 709 | .constraints = snb_uncore_cbox_constraints, |
705 | .ops = &snb_uncore_msr_ops, | 710 | .ops = &snb_uncore_msr_ops, |
706 | .format_group = &snb_uncore_format_group, | 711 | .format_group = &snb_uncore_format_group, |
712 | .event_descs = snb_uncore_events, | ||
707 | }; | 713 | }; |
708 | 714 | ||
709 | static struct intel_uncore_type *snb_msr_uncores[] = { | 715 | static struct intel_uncore_type *snb_msr_uncores[] = { |
@@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp | |||
1944 | static struct intel_uncore_box * | 1950 | static struct intel_uncore_box * |
1945 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | 1951 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) |
1946 | { | 1952 | { |
1947 | static struct intel_uncore_box *box; | 1953 | struct intel_uncore_box *box; |
1948 | 1954 | ||
1949 | box = *per_cpu_ptr(pmu->box, cpu); | 1955 | box = *per_cpu_ptr(pmu->box, cpu); |
1950 | if (box) | 1956 | if (box) |
@@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event) | |||
2341 | return ret; | 2347 | return ret; |
2342 | } | 2348 | } |
2343 | 2349 | ||
2350 | static ssize_t uncore_get_attr_cpumask(struct device *dev, | ||
2351 | struct device_attribute *attr, char *buf) | ||
2352 | { | ||
2353 | int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); | ||
2354 | |||
2355 | buf[n++] = '\n'; | ||
2356 | buf[n] = '\0'; | ||
2357 | return n; | ||
2358 | } | ||
2359 | |||
2360 | static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); | ||
2361 | |||
2362 | static struct attribute *uncore_pmu_attrs[] = { | ||
2363 | &dev_attr_cpumask.attr, | ||
2364 | NULL, | ||
2365 | }; | ||
2366 | |||
2367 | static struct attribute_group uncore_pmu_attr_group = { | ||
2368 | .attrs = uncore_pmu_attrs, | ||
2369 | }; | ||
2370 | |||
2344 | static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) | 2371 | static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) |
2345 | { | 2372 | { |
2346 | int ret; | 2373 | int ret; |
@@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) | |||
2378 | free_percpu(type->pmus[i].box); | 2405 | free_percpu(type->pmus[i].box); |
2379 | kfree(type->pmus); | 2406 | kfree(type->pmus); |
2380 | type->pmus = NULL; | 2407 | type->pmus = NULL; |
2381 | kfree(type->attr_groups[1]); | 2408 | kfree(type->events_group); |
2382 | type->attr_groups[1] = NULL; | 2409 | type->events_group = NULL; |
2383 | } | 2410 | } |
2384 | 2411 | ||
2385 | static void __init uncore_types_exit(struct intel_uncore_type **types) | 2412 | static void __init uncore_types_exit(struct intel_uncore_type **types) |
@@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) | |||
2431 | for (j = 0; j < i; j++) | 2458 | for (j = 0; j < i; j++) |
2432 | attrs[j] = &type->event_descs[j].attr.attr; | 2459 | attrs[j] = &type->event_descs[j].attr.attr; |
2433 | 2460 | ||
2434 | type->attr_groups[1] = events_group; | 2461 | type->events_group = events_group; |
2435 | } | 2462 | } |
2436 | 2463 | ||
2464 | type->pmu_group = &uncore_pmu_attr_group; | ||
2437 | type->pmus = pmus; | 2465 | type->pmus = pmus; |
2438 | return 0; | 2466 | return 0; |
2439 | fail: | 2467 | fail: |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c1856aac..e68a4550e952 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -369,10 +369,12 @@ struct intel_uncore_type { | |||
369 | struct intel_uncore_pmu *pmus; | 369 | struct intel_uncore_pmu *pmus; |
370 | struct intel_uncore_ops *ops; | 370 | struct intel_uncore_ops *ops; |
371 | struct uncore_event_desc *event_descs; | 371 | struct uncore_event_desc *event_descs; |
372 | const struct attribute_group *attr_groups[3]; | 372 | const struct attribute_group *attr_groups[4]; |
373 | }; | 373 | }; |
374 | 374 | ||
375 | #define format_group attr_groups[0] | 375 | #define pmu_group attr_groups[0] |
376 | #define format_group attr_groups[1] | ||
377 | #define events_group attr_groups[2] | ||
376 | 378 | ||
377 | struct intel_uncore_ops { | 379 | struct intel_uncore_ops { |
378 | void (*init_box)(struct intel_uncore_box *); | 380 | void (*init_box)(struct intel_uncore_box *); |
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c new file mode 100644 index 000000000000..7c46bfdbc373 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_knc.c | |||
@@ -0,0 +1,248 @@ | |||
1 | /* Driver for Intel Xeon Phi "Knights Corner" PMU */ | ||
2 | |||
3 | #include <linux/perf_event.h> | ||
4 | #include <linux/types.h> | ||
5 | |||
6 | #include "perf_event.h" | ||
7 | |||
8 | static const u64 knc_perfmon_event_map[] = | ||
9 | { | ||
10 | [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, | ||
11 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, | ||
12 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, | ||
13 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, | ||
14 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, | ||
15 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, | ||
16 | }; | ||
17 | |||
18 | static __initconst u64 knc_hw_cache_event_ids | ||
19 | [PERF_COUNT_HW_CACHE_MAX] | ||
20 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
21 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
22 | { | ||
23 | [ C(L1D) ] = { | ||
24 | [ C(OP_READ) ] = { | ||
25 | /* On Xeon Phi event "0" is a valid DATA_READ */ | ||
26 | /* (L1 Data Cache Reads) Instruction. */ | ||
27 | /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ | ||
28 | /* bit will always be set in x86_pmu_hw_config(). */ | ||
29 | [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | ||
30 | /* DATA_READ */ | ||
31 | [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ | ||
32 | }, | ||
33 | [ C(OP_WRITE) ] = { | ||
34 | [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | ||
35 | [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ | ||
36 | }, | ||
37 | [ C(OP_PREFETCH) ] = { | ||
38 | [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ | ||
39 | [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ | ||
40 | }, | ||
41 | }, | ||
42 | [ C(L1I ) ] = { | ||
43 | [ C(OP_READ) ] = { | ||
44 | [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | ||
45 | [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ | ||
46 | }, | ||
47 | [ C(OP_WRITE) ] = { | ||
48 | [ C(RESULT_ACCESS) ] = -1, | ||
49 | [ C(RESULT_MISS) ] = -1, | ||
50 | }, | ||
51 | [ C(OP_PREFETCH) ] = { | ||
52 | [ C(RESULT_ACCESS) ] = 0x0, | ||
53 | [ C(RESULT_MISS) ] = 0x0, | ||
54 | }, | ||
55 | }, | ||
56 | [ C(LL ) ] = { | ||
57 | [ C(OP_READ) ] = { | ||
58 | [ C(RESULT_ACCESS) ] = 0, | ||
59 | [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ | ||
60 | }, | ||
61 | [ C(OP_WRITE) ] = { | ||
62 | [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ | ||
63 | [ C(RESULT_MISS) ] = 0, | ||
64 | }, | ||
65 | [ C(OP_PREFETCH) ] = { | ||
66 | [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ | ||
67 | [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ | ||
68 | }, | ||
69 | }, | ||
70 | [ C(DTLB) ] = { | ||
71 | [ C(OP_READ) ] = { | ||
72 | [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | ||
73 | /* DATA_READ */ | ||
74 | /* see note on L1 OP_READ */ | ||
75 | [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | ||
76 | }, | ||
77 | [ C(OP_WRITE) ] = { | ||
78 | [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | ||
79 | [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | ||
80 | }, | ||
81 | [ C(OP_PREFETCH) ] = { | ||
82 | [ C(RESULT_ACCESS) ] = 0x0, | ||
83 | [ C(RESULT_MISS) ] = 0x0, | ||
84 | }, | ||
85 | }, | ||
86 | [ C(ITLB) ] = { | ||
87 | [ C(OP_READ) ] = { | ||
88 | [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | ||
89 | [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ | ||
90 | }, | ||
91 | [ C(OP_WRITE) ] = { | ||
92 | [ C(RESULT_ACCESS) ] = -1, | ||
93 | [ C(RESULT_MISS) ] = -1, | ||
94 | }, | ||
95 | [ C(OP_PREFETCH) ] = { | ||
96 | [ C(RESULT_ACCESS) ] = -1, | ||
97 | [ C(RESULT_MISS) ] = -1, | ||
98 | }, | ||
99 | }, | ||
100 | [ C(BPU ) ] = { | ||
101 | [ C(OP_READ) ] = { | ||
102 | [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ | ||
103 | [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ | ||
104 | }, | ||
105 | [ C(OP_WRITE) ] = { | ||
106 | [ C(RESULT_ACCESS) ] = -1, | ||
107 | [ C(RESULT_MISS) ] = -1, | ||
108 | }, | ||
109 | [ C(OP_PREFETCH) ] = { | ||
110 | [ C(RESULT_ACCESS) ] = -1, | ||
111 | [ C(RESULT_MISS) ] = -1, | ||
112 | }, | ||
113 | }, | ||
114 | }; | ||
115 | |||
116 | |||
117 | static u64 knc_pmu_event_map(int hw_event) | ||
118 | { | ||
119 | return knc_perfmon_event_map[hw_event]; | ||
120 | } | ||
121 | |||
122 | static struct event_constraint knc_event_constraints[] = | ||
123 | { | ||
124 | INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ | ||
125 | INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ | ||
126 | INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ | ||
127 | INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ | ||
128 | INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ | ||
129 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ | ||
130 | INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ | ||
131 | INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ | ||
132 | INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ | ||
133 | INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ | ||
134 | INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ | ||
135 | INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ | ||
136 | INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ | ||
137 | INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ | ||
138 | INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ | ||
139 | INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ | ||
140 | INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ | ||
141 | INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ | ||
142 | INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ | ||
143 | INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ | ||
144 | INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ | ||
145 | EVENT_CONSTRAINT_END | ||
146 | }; | ||
147 | |||
148 | #define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d | ||
149 | #define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e | ||
150 | #define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f | ||
151 | |||
152 | #define KNC_ENABLE_COUNTER0 0x00000001 | ||
153 | #define KNC_ENABLE_COUNTER1 0x00000002 | ||
154 | |||
155 | static void knc_pmu_disable_all(void) | ||
156 | { | ||
157 | u64 val; | ||
158 | |||
159 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
160 | val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | ||
161 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
162 | } | ||
163 | |||
164 | static void knc_pmu_enable_all(int added) | ||
165 | { | ||
166 | u64 val; | ||
167 | |||
168 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
169 | val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | ||
170 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
171 | } | ||
172 | |||
173 | static inline void | ||
174 | knc_pmu_disable_event(struct perf_event *event) | ||
175 | { | ||
176 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
177 | struct hw_perf_event *hwc = &event->hw; | ||
178 | u64 val; | ||
179 | |||
180 | val = hwc->config; | ||
181 | if (cpuc->enabled) | ||
182 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
183 | |||
184 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | ||
185 | } | ||
186 | |||
187 | static void knc_pmu_enable_event(struct perf_event *event) | ||
188 | { | ||
189 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
190 | struct hw_perf_event *hwc = &event->hw; | ||
191 | u64 val; | ||
192 | |||
193 | val = hwc->config; | ||
194 | if (cpuc->enabled) | ||
195 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
196 | |||
197 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | ||
198 | } | ||
199 | |||
200 | PMU_FORMAT_ATTR(event, "config:0-7" ); | ||
201 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | ||
202 | PMU_FORMAT_ATTR(edge, "config:18" ); | ||
203 | PMU_FORMAT_ATTR(inv, "config:23" ); | ||
204 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); | ||
205 | |||
206 | static struct attribute *intel_knc_formats_attr[] = { | ||
207 | &format_attr_event.attr, | ||
208 | &format_attr_umask.attr, | ||
209 | &format_attr_edge.attr, | ||
210 | &format_attr_inv.attr, | ||
211 | &format_attr_cmask.attr, | ||
212 | NULL, | ||
213 | }; | ||
214 | |||
215 | static __initconst struct x86_pmu knc_pmu = { | ||
216 | .name = "knc", | ||
217 | .handle_irq = x86_pmu_handle_irq, | ||
218 | .disable_all = knc_pmu_disable_all, | ||
219 | .enable_all = knc_pmu_enable_all, | ||
220 | .enable = knc_pmu_enable_event, | ||
221 | .disable = knc_pmu_disable_event, | ||
222 | .hw_config = x86_pmu_hw_config, | ||
223 | .schedule_events = x86_schedule_events, | ||
224 | .eventsel = MSR_KNC_EVNTSEL0, | ||
225 | .perfctr = MSR_KNC_PERFCTR0, | ||
226 | .event_map = knc_pmu_event_map, | ||
227 | .max_events = ARRAY_SIZE(knc_perfmon_event_map), | ||
228 | .apic = 1, | ||
229 | .max_period = (1ULL << 31) - 1, | ||
230 | .version = 0, | ||
231 | .num_counters = 2, | ||
232 | /* in theory 40 bits, early silicon is buggy though */ | ||
233 | .cntval_bits = 32, | ||
234 | .cntval_mask = (1ULL << 32) - 1, | ||
235 | .get_event_constraints = x86_get_event_constraints, | ||
236 | .event_constraints = knc_event_constraints, | ||
237 | .format_attrs = intel_knc_formats_attr, | ||
238 | }; | ||
239 | |||
240 | __init int knc_pmu_init(void) | ||
241 | { | ||
242 | x86_pmu = knc_pmu; | ||
243 | |||
244 | memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, | ||
245 | sizeof(hw_cache_event_ids)); | ||
246 | |||
247 | return 0; | ||
248 | } | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 966512b2cacf..2e8caf03f593 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | |||
56 | switch (boot_cpu_data.x86) { | 56 | switch (boot_cpu_data.x86) { |
57 | case 6: | 57 | case 6: |
58 | return msr - MSR_P6_PERFCTR0; | 58 | return msr - MSR_P6_PERFCTR0; |
59 | case 11: | ||
60 | return msr - MSR_KNC_PERFCTR0; | ||
59 | case 15: | 61 | case 15: |
60 | return msr - MSR_P4_BPU_PERFCTR0; | 62 | return msr - MSR_P4_BPU_PERFCTR0; |
61 | } | 63 | } |
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | |||
82 | switch (boot_cpu_data.x86) { | 84 | switch (boot_cpu_data.x86) { |
83 | case 6: | 85 | case 6: |
84 | return msr - MSR_P6_EVNTSEL0; | 86 | return msr - MSR_P6_EVNTSEL0; |
87 | case 11: | ||
88 | return msr - MSR_KNC_EVNTSEL0; | ||
85 | case 15: | 89 | case 15: |
86 | return msr - MSR_P4_BSU_ESCR0; | 90 | return msr - MSR_P4_BSU_ESCR0; |
87 | } | 91 | } |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 8022c6681485..fbd895562292 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
140 | 140 | ||
141 | static void *c_start(struct seq_file *m, loff_t *pos) | 141 | static void *c_start(struct seq_file *m, loff_t *pos) |
142 | { | 142 | { |
143 | if (*pos == 0) /* just in case, cpu 0 is not the first */ | 143 | *pos = cpumask_next(*pos - 1, cpu_online_mask); |
144 | *pos = cpumask_first(cpu_online_mask); | ||
145 | else | ||
146 | *pos = cpumask_next(*pos - 1, cpu_online_mask); | ||
147 | if ((*pos) < nr_cpu_ids) | 144 | if ((*pos) < nr_cpu_ids) |
148 | return &cpu_data(*pos); | 145 | return &cpu_data(*pos); |
149 | return NULL; | 146 | return NULL; |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd2323f..60c78917190c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -199,12 +199,14 @@ static int __init cpuid_init(void) | |||
199 | goto out_chrdev; | 199 | goto out_chrdev; |
200 | } | 200 | } |
201 | cpuid_class->devnode = cpuid_devnode; | 201 | cpuid_class->devnode = cpuid_devnode; |
202 | get_online_cpus(); | ||
202 | for_each_online_cpu(i) { | 203 | for_each_online_cpu(i) { |
203 | err = cpuid_device_create(i); | 204 | err = cpuid_device_create(i); |
204 | if (err != 0) | 205 | if (err != 0) |
205 | goto out_class; | 206 | goto out_class; |
206 | } | 207 | } |
207 | register_hotcpu_notifier(&cpuid_class_cpu_notifier); | 208 | register_hotcpu_notifier(&cpuid_class_cpu_notifier); |
209 | put_online_cpus(); | ||
208 | 210 | ||
209 | err = 0; | 211 | err = 0; |
210 | goto out; | 212 | goto out; |
@@ -214,6 +216,7 @@ out_class: | |||
214 | for_each_online_cpu(i) { | 216 | for_each_online_cpu(i) { |
215 | cpuid_device_destroy(i); | 217 | cpuid_device_destroy(i); |
216 | } | 218 | } |
219 | put_online_cpus(); | ||
217 | class_destroy(cpuid_class); | 220 | class_destroy(cpuid_class); |
218 | out_chrdev: | 221 | out_chrdev: |
219 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); | 222 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void) | |||
225 | { | 228 | { |
226 | int cpu = 0; | 229 | int cpu = 0; |
227 | 230 | ||
231 | get_online_cpus(); | ||
228 | for_each_online_cpu(cpu) | 232 | for_each_online_cpu(cpu) |
229 | cpuid_device_destroy(cpu); | 233 | cpuid_device_destroy(cpu); |
230 | class_destroy(cpuid_class); | 234 | class_destroy(cpuid_class); |
231 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); | 235 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
232 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); | 236 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); |
237 | put_online_cpus(); | ||
233 | } | 238 | } |
234 | 239 | ||
235 | module_init(cpuid_init); | 240 | module_init(cpuid_init); |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced4a874..b1581527a236 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { | |||
342 | .xlate = ioapic_xlate, | 342 | .xlate = ioapic_xlate, |
343 | }; | 343 | }; |
344 | 344 | ||
345 | static void dt_add_ioapic_domain(unsigned int ioapic_num, | ||
346 | struct device_node *np) | ||
347 | { | ||
348 | struct irq_domain *id; | ||
349 | struct mp_ioapic_gsi *gsi_cfg; | ||
350 | int ret; | ||
351 | int num; | ||
352 | |||
353 | gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); | ||
354 | num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; | ||
355 | |||
356 | id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, | ||
357 | (void *)ioapic_num); | ||
358 | BUG_ON(!id); | ||
359 | if (gsi_cfg->gsi_base == 0) { | ||
360 | /* | ||
361 | * The first NR_IRQS_LEGACY irq descs are allocated in | ||
362 | * early_irq_init() and need just a mapping. The | ||
363 | * remaining irqs need both. All of them are preallocated | ||
364 | * and assigned so we can keep the 1:1 mapping which the ioapic | ||
365 | * is having. | ||
366 | */ | ||
367 | ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); | ||
368 | if (ret) | ||
369 | pr_err("Error mapping legacy IRQs: %d\n", ret); | ||
370 | |||
371 | if (num > NR_IRQS_LEGACY) { | ||
372 | ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, | ||
373 | NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); | ||
374 | if (ret) | ||
375 | pr_err("Error creating mapping for the " | ||
376 | "remaining IRQs: %d\n", ret); | ||
377 | } | ||
378 | irq_set_default_host(id); | ||
379 | } else { | ||
380 | ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); | ||
381 | if (ret) | ||
382 | pr_err("Error creating IRQ mapping: %d\n", ret); | ||
383 | } | ||
384 | } | ||
385 | |||
345 | static void __init ioapic_add_ofnode(struct device_node *np) | 386 | static void __init ioapic_add_ofnode(struct device_node *np) |
346 | { | 387 | { |
347 | struct resource r; | 388 | struct resource r; |
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) | |||
356 | 397 | ||
357 | for (i = 0; i < nr_ioapics; i++) { | 398 | for (i = 0; i < nr_ioapics; i++) { |
358 | if (r.start == mpc_ioapic_addr(i)) { | 399 | if (r.start == mpc_ioapic_addr(i)) { |
359 | struct irq_domain *id; | 400 | dt_add_ioapic_domain(i, np); |
360 | struct mp_ioapic_gsi *gsi_cfg; | ||
361 | |||
362 | gsi_cfg = mp_ioapic_gsi_routing(i); | ||
363 | |||
364 | id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, | ||
365 | &ioapic_irq_domain_ops, | ||
366 | (void*)i); | ||
367 | BUG_ON(!id); | ||
368 | return; | 401 | return; |
369 | } | 402 | } |
370 | } | 403 | } |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 8f8e8eea9085..88b725aa1d52 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <asm/cpufeature.h> | 57 | #include <asm/cpufeature.h> |
58 | #include <asm/alternative-asm.h> | 58 | #include <asm/alternative-asm.h> |
59 | #include <asm/asm.h> | 59 | #include <asm/asm.h> |
60 | #include <asm/smap.h> | ||
60 | 61 | ||
61 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 62 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
62 | #include <linux/elf-em.h> | 63 | #include <linux/elf-em.h> |
@@ -298,6 +299,21 @@ ENTRY(ret_from_fork) | |||
298 | CFI_ENDPROC | 299 | CFI_ENDPROC |
299 | END(ret_from_fork) | 300 | END(ret_from_fork) |
300 | 301 | ||
302 | ENTRY(ret_from_kernel_thread) | ||
303 | CFI_STARTPROC | ||
304 | pushl_cfi %eax | ||
305 | call schedule_tail | ||
306 | GET_THREAD_INFO(%ebp) | ||
307 | popl_cfi %eax | ||
308 | pushl_cfi $0x0202 # Reset kernel eflags | ||
309 | popfl_cfi | ||
310 | movl PT_EBP(%esp),%eax | ||
311 | call *PT_EBX(%esp) | ||
312 | movl $0,PT_EAX(%esp) | ||
313 | jmp syscall_exit | ||
314 | CFI_ENDPROC | ||
315 | ENDPROC(ret_from_kernel_thread) | ||
316 | |||
301 | /* | 317 | /* |
302 | * Interrupt exit functions should be protected against kprobes | 318 | * Interrupt exit functions should be protected against kprobes |
303 | */ | 319 | */ |
@@ -322,8 +338,7 @@ ret_from_intr: | |||
322 | andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax | 338 | andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax |
323 | #else | 339 | #else |
324 | /* | 340 | /* |
325 | * We can be coming here from a syscall done in the kernel space, | 341 | * We can be coming here from child spawned by kernel_thread(). |
326 | * e.g. a failed kernel_execve(). | ||
327 | */ | 342 | */ |
328 | movl PT_CS(%esp), %eax | 343 | movl PT_CS(%esp), %eax |
329 | andl $SEGMENT_RPL_MASK, %eax | 344 | andl $SEGMENT_RPL_MASK, %eax |
@@ -407,7 +422,9 @@ sysenter_past_esp: | |||
407 | */ | 422 | */ |
408 | cmpl $__PAGE_OFFSET-3,%ebp | 423 | cmpl $__PAGE_OFFSET-3,%ebp |
409 | jae syscall_fault | 424 | jae syscall_fault |
425 | ASM_STAC | ||
410 | 1: movl (%ebp),%ebp | 426 | 1: movl (%ebp),%ebp |
427 | ASM_CLAC | ||
411 | movl %ebp,PT_EBP(%esp) | 428 | movl %ebp,PT_EBP(%esp) |
412 | _ASM_EXTABLE(1b,syscall_fault) | 429 | _ASM_EXTABLE(1b,syscall_fault) |
413 | 430 | ||
@@ -488,6 +505,7 @@ ENDPROC(ia32_sysenter_target) | |||
488 | # system call handler stub | 505 | # system call handler stub |
489 | ENTRY(system_call) | 506 | ENTRY(system_call) |
490 | RING0_INT_FRAME # can't unwind into user space anyway | 507 | RING0_INT_FRAME # can't unwind into user space anyway |
508 | ASM_CLAC | ||
491 | pushl_cfi %eax # save orig_eax | 509 | pushl_cfi %eax # save orig_eax |
492 | SAVE_ALL | 510 | SAVE_ALL |
493 | GET_THREAD_INFO(%ebp) | 511 | GET_THREAD_INFO(%ebp) |
@@ -612,6 +630,10 @@ work_notifysig: # deal with pending signals and | |||
612 | movl %esp, %eax | 630 | movl %esp, %eax |
613 | jne work_notifysig_v86 # returning to kernel-space or | 631 | jne work_notifysig_v86 # returning to kernel-space or |
614 | # vm86-space | 632 | # vm86-space |
633 | 1: | ||
634 | #else | ||
635 | movl %esp, %eax | ||
636 | #endif | ||
615 | TRACE_IRQS_ON | 637 | TRACE_IRQS_ON |
616 | ENABLE_INTERRUPTS(CLBR_NONE) | 638 | ENABLE_INTERRUPTS(CLBR_NONE) |
617 | movb PT_CS(%esp), %bl | 639 | movb PT_CS(%esp), %bl |
@@ -622,24 +644,15 @@ work_notifysig: # deal with pending signals and | |||
622 | call do_notify_resume | 644 | call do_notify_resume |
623 | jmp resume_userspace | 645 | jmp resume_userspace |
624 | 646 | ||
647 | #ifdef CONFIG_VM86 | ||
625 | ALIGN | 648 | ALIGN |
626 | work_notifysig_v86: | 649 | work_notifysig_v86: |
627 | pushl_cfi %ecx # save ti_flags for do_notify_resume | 650 | pushl_cfi %ecx # save ti_flags for do_notify_resume |
628 | call save_v86_state # %eax contains pt_regs pointer | 651 | call save_v86_state # %eax contains pt_regs pointer |
629 | popl_cfi %ecx | 652 | popl_cfi %ecx |
630 | movl %eax, %esp | 653 | movl %eax, %esp |
631 | #else | 654 | jmp 1b |
632 | movl %esp, %eax | ||
633 | #endif | 655 | #endif |
634 | TRACE_IRQS_ON | ||
635 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
636 | movb PT_CS(%esp), %bl | ||
637 | andb $SEGMENT_RPL_MASK, %bl | ||
638 | cmpb $USER_RPL, %bl | ||
639 | jb resume_kernel | ||
640 | xorl %edx, %edx | ||
641 | call do_notify_resume | ||
642 | jmp resume_userspace | ||
643 | END(work_pending) | 656 | END(work_pending) |
644 | 657 | ||
645 | # perform syscall exit tracing | 658 | # perform syscall exit tracing |
@@ -670,6 +683,7 @@ END(syscall_exit_work) | |||
670 | 683 | ||
671 | RING0_INT_FRAME # can't unwind into user space anyway | 684 | RING0_INT_FRAME # can't unwind into user space anyway |
672 | syscall_fault: | 685 | syscall_fault: |
686 | ASM_CLAC | ||
673 | GET_THREAD_INFO(%ebp) | 687 | GET_THREAD_INFO(%ebp) |
674 | movl $-EFAULT,PT_EAX(%esp) | 688 | movl $-EFAULT,PT_EAX(%esp) |
675 | jmp resume_userspace | 689 | jmp resume_userspace |
@@ -727,7 +741,6 @@ ENDPROC(ptregs_##name) | |||
727 | PTREGSCALL1(iopl) | 741 | PTREGSCALL1(iopl) |
728 | PTREGSCALL0(fork) | 742 | PTREGSCALL0(fork) |
729 | PTREGSCALL0(vfork) | 743 | PTREGSCALL0(vfork) |
730 | PTREGSCALL3(execve) | ||
731 | PTREGSCALL2(sigaltstack) | 744 | PTREGSCALL2(sigaltstack) |
732 | PTREGSCALL0(sigreturn) | 745 | PTREGSCALL0(sigreturn) |
733 | PTREGSCALL0(rt_sigreturn) | 746 | PTREGSCALL0(rt_sigreturn) |
@@ -825,6 +838,7 @@ END(interrupt) | |||
825 | */ | 838 | */ |
826 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 839 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
827 | common_interrupt: | 840 | common_interrupt: |
841 | ASM_CLAC | ||
828 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ | 842 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ |
829 | SAVE_ALL | 843 | SAVE_ALL |
830 | TRACE_IRQS_OFF | 844 | TRACE_IRQS_OFF |
@@ -841,6 +855,7 @@ ENDPROC(common_interrupt) | |||
841 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 855 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
842 | ENTRY(name) \ | 856 | ENTRY(name) \ |
843 | RING0_INT_FRAME; \ | 857 | RING0_INT_FRAME; \ |
858 | ASM_CLAC; \ | ||
844 | pushl_cfi $~(nr); \ | 859 | pushl_cfi $~(nr); \ |
845 | SAVE_ALL; \ | 860 | SAVE_ALL; \ |
846 | TRACE_IRQS_OFF \ | 861 | TRACE_IRQS_OFF \ |
@@ -857,6 +872,7 @@ ENDPROC(name) | |||
857 | 872 | ||
858 | ENTRY(coprocessor_error) | 873 | ENTRY(coprocessor_error) |
859 | RING0_INT_FRAME | 874 | RING0_INT_FRAME |
875 | ASM_CLAC | ||
860 | pushl_cfi $0 | 876 | pushl_cfi $0 |
861 | pushl_cfi $do_coprocessor_error | 877 | pushl_cfi $do_coprocessor_error |
862 | jmp error_code | 878 | jmp error_code |
@@ -865,6 +881,7 @@ END(coprocessor_error) | |||
865 | 881 | ||
866 | ENTRY(simd_coprocessor_error) | 882 | ENTRY(simd_coprocessor_error) |
867 | RING0_INT_FRAME | 883 | RING0_INT_FRAME |
884 | ASM_CLAC | ||
868 | pushl_cfi $0 | 885 | pushl_cfi $0 |
869 | #ifdef CONFIG_X86_INVD_BUG | 886 | #ifdef CONFIG_X86_INVD_BUG |
870 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | 887 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ |
@@ -886,6 +903,7 @@ END(simd_coprocessor_error) | |||
886 | 903 | ||
887 | ENTRY(device_not_available) | 904 | ENTRY(device_not_available) |
888 | RING0_INT_FRAME | 905 | RING0_INT_FRAME |
906 | ASM_CLAC | ||
889 | pushl_cfi $-1 # mark this as an int | 907 | pushl_cfi $-1 # mark this as an int |
890 | pushl_cfi $do_device_not_available | 908 | pushl_cfi $do_device_not_available |
891 | jmp error_code | 909 | jmp error_code |
@@ -906,6 +924,7 @@ END(native_irq_enable_sysexit) | |||
906 | 924 | ||
907 | ENTRY(overflow) | 925 | ENTRY(overflow) |
908 | RING0_INT_FRAME | 926 | RING0_INT_FRAME |
927 | ASM_CLAC | ||
909 | pushl_cfi $0 | 928 | pushl_cfi $0 |
910 | pushl_cfi $do_overflow | 929 | pushl_cfi $do_overflow |
911 | jmp error_code | 930 | jmp error_code |
@@ -914,6 +933,7 @@ END(overflow) | |||
914 | 933 | ||
915 | ENTRY(bounds) | 934 | ENTRY(bounds) |
916 | RING0_INT_FRAME | 935 | RING0_INT_FRAME |
936 | ASM_CLAC | ||
917 | pushl_cfi $0 | 937 | pushl_cfi $0 |
918 | pushl_cfi $do_bounds | 938 | pushl_cfi $do_bounds |
919 | jmp error_code | 939 | jmp error_code |
@@ -922,6 +942,7 @@ END(bounds) | |||
922 | 942 | ||
923 | ENTRY(invalid_op) | 943 | ENTRY(invalid_op) |
924 | RING0_INT_FRAME | 944 | RING0_INT_FRAME |
945 | ASM_CLAC | ||
925 | pushl_cfi $0 | 946 | pushl_cfi $0 |
926 | pushl_cfi $do_invalid_op | 947 | pushl_cfi $do_invalid_op |
927 | jmp error_code | 948 | jmp error_code |
@@ -930,6 +951,7 @@ END(invalid_op) | |||
930 | 951 | ||
931 | ENTRY(coprocessor_segment_overrun) | 952 | ENTRY(coprocessor_segment_overrun) |
932 | RING0_INT_FRAME | 953 | RING0_INT_FRAME |
954 | ASM_CLAC | ||
933 | pushl_cfi $0 | 955 | pushl_cfi $0 |
934 | pushl_cfi $do_coprocessor_segment_overrun | 956 | pushl_cfi $do_coprocessor_segment_overrun |
935 | jmp error_code | 957 | jmp error_code |
@@ -938,6 +960,7 @@ END(coprocessor_segment_overrun) | |||
938 | 960 | ||
939 | ENTRY(invalid_TSS) | 961 | ENTRY(invalid_TSS) |
940 | RING0_EC_FRAME | 962 | RING0_EC_FRAME |
963 | ASM_CLAC | ||
941 | pushl_cfi $do_invalid_TSS | 964 | pushl_cfi $do_invalid_TSS |
942 | jmp error_code | 965 | jmp error_code |
943 | CFI_ENDPROC | 966 | CFI_ENDPROC |
@@ -945,6 +968,7 @@ END(invalid_TSS) | |||
945 | 968 | ||
946 | ENTRY(segment_not_present) | 969 | ENTRY(segment_not_present) |
947 | RING0_EC_FRAME | 970 | RING0_EC_FRAME |
971 | ASM_CLAC | ||
948 | pushl_cfi $do_segment_not_present | 972 | pushl_cfi $do_segment_not_present |
949 | jmp error_code | 973 | jmp error_code |
950 | CFI_ENDPROC | 974 | CFI_ENDPROC |
@@ -952,6 +976,7 @@ END(segment_not_present) | |||
952 | 976 | ||
953 | ENTRY(stack_segment) | 977 | ENTRY(stack_segment) |
954 | RING0_EC_FRAME | 978 | RING0_EC_FRAME |
979 | ASM_CLAC | ||
955 | pushl_cfi $do_stack_segment | 980 | pushl_cfi $do_stack_segment |
956 | jmp error_code | 981 | jmp error_code |
957 | CFI_ENDPROC | 982 | CFI_ENDPROC |
@@ -959,6 +984,7 @@ END(stack_segment) | |||
959 | 984 | ||
960 | ENTRY(alignment_check) | 985 | ENTRY(alignment_check) |
961 | RING0_EC_FRAME | 986 | RING0_EC_FRAME |
987 | ASM_CLAC | ||
962 | pushl_cfi $do_alignment_check | 988 | pushl_cfi $do_alignment_check |
963 | jmp error_code | 989 | jmp error_code |
964 | CFI_ENDPROC | 990 | CFI_ENDPROC |
@@ -966,6 +992,7 @@ END(alignment_check) | |||
966 | 992 | ||
967 | ENTRY(divide_error) | 993 | ENTRY(divide_error) |
968 | RING0_INT_FRAME | 994 | RING0_INT_FRAME |
995 | ASM_CLAC | ||
969 | pushl_cfi $0 # no error code | 996 | pushl_cfi $0 # no error code |
970 | pushl_cfi $do_divide_error | 997 | pushl_cfi $do_divide_error |
971 | jmp error_code | 998 | jmp error_code |
@@ -975,6 +1002,7 @@ END(divide_error) | |||
975 | #ifdef CONFIG_X86_MCE | 1002 | #ifdef CONFIG_X86_MCE |
976 | ENTRY(machine_check) | 1003 | ENTRY(machine_check) |
977 | RING0_INT_FRAME | 1004 | RING0_INT_FRAME |
1005 | ASM_CLAC | ||
978 | pushl_cfi $0 | 1006 | pushl_cfi $0 |
979 | pushl_cfi machine_check_vector | 1007 | pushl_cfi machine_check_vector |
980 | jmp error_code | 1008 | jmp error_code |
@@ -984,6 +1012,7 @@ END(machine_check) | |||
984 | 1012 | ||
985 | ENTRY(spurious_interrupt_bug) | 1013 | ENTRY(spurious_interrupt_bug) |
986 | RING0_INT_FRAME | 1014 | RING0_INT_FRAME |
1015 | ASM_CLAC | ||
987 | pushl_cfi $0 | 1016 | pushl_cfi $0 |
988 | pushl_cfi $do_spurious_interrupt_bug | 1017 | pushl_cfi $do_spurious_interrupt_bug |
989 | jmp error_code | 1018 | jmp error_code |
@@ -994,16 +1023,6 @@ END(spurious_interrupt_bug) | |||
994 | */ | 1023 | */ |
995 | .popsection | 1024 | .popsection |
996 | 1025 | ||
997 | ENTRY(kernel_thread_helper) | ||
998 | pushl $0 # fake return address for unwinder | ||
999 | CFI_STARTPROC | ||
1000 | movl %edi,%eax | ||
1001 | call *%esi | ||
1002 | call do_exit | ||
1003 | ud2 # padding for call trace | ||
1004 | CFI_ENDPROC | ||
1005 | ENDPROC(kernel_thread_helper) | ||
1006 | |||
1007 | #ifdef CONFIG_XEN | 1026 | #ifdef CONFIG_XEN |
1008 | /* Xen doesn't set %esp to be precisely what the normal sysenter | 1027 | /* Xen doesn't set %esp to be precisely what the normal sysenter |
1009 | entrypoint expects, so fix it up before using the normal path. */ | 1028 | entrypoint expects, so fix it up before using the normal path. */ |
@@ -1111,17 +1130,21 @@ ENTRY(ftrace_caller) | |||
1111 | pushl %eax | 1130 | pushl %eax |
1112 | pushl %ecx | 1131 | pushl %ecx |
1113 | pushl %edx | 1132 | pushl %edx |
1114 | movl 0xc(%esp), %eax | 1133 | pushl $0 /* Pass NULL as regs pointer */ |
1134 | movl 4*4(%esp), %eax | ||
1115 | movl 0x4(%ebp), %edx | 1135 | movl 0x4(%ebp), %edx |
1136 | leal function_trace_op, %ecx | ||
1116 | subl $MCOUNT_INSN_SIZE, %eax | 1137 | subl $MCOUNT_INSN_SIZE, %eax |
1117 | 1138 | ||
1118 | .globl ftrace_call | 1139 | .globl ftrace_call |
1119 | ftrace_call: | 1140 | ftrace_call: |
1120 | call ftrace_stub | 1141 | call ftrace_stub |
1121 | 1142 | ||
1143 | addl $4,%esp /* skip NULL pointer */ | ||
1122 | popl %edx | 1144 | popl %edx |
1123 | popl %ecx | 1145 | popl %ecx |
1124 | popl %eax | 1146 | popl %eax |
1147 | ftrace_ret: | ||
1125 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1148 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1126 | .globl ftrace_graph_call | 1149 | .globl ftrace_graph_call |
1127 | ftrace_graph_call: | 1150 | ftrace_graph_call: |
@@ -1133,6 +1156,71 @@ ftrace_stub: | |||
1133 | ret | 1156 | ret |
1134 | END(ftrace_caller) | 1157 | END(ftrace_caller) |
1135 | 1158 | ||
1159 | ENTRY(ftrace_regs_caller) | ||
1160 | pushf /* push flags before compare (in cs location) */ | ||
1161 | cmpl $0, function_trace_stop | ||
1162 | jne ftrace_restore_flags | ||
1163 | |||
1164 | /* | ||
1165 | * i386 does not save SS and ESP when coming from kernel. | ||
1166 | * Instead, to get sp, ®s->sp is used (see ptrace.h). | ||
1167 | * Unfortunately, that means eflags must be at the same location | ||
1168 | * as the current return ip is. We move the return ip into the | ||
1169 | * ip location, and move flags into the return ip location. | ||
1170 | */ | ||
1171 | pushl 4(%esp) /* save return ip into ip slot */ | ||
1172 | |||
1173 | pushl $0 /* Load 0 into orig_ax */ | ||
1174 | pushl %gs | ||
1175 | pushl %fs | ||
1176 | pushl %es | ||
1177 | pushl %ds | ||
1178 | pushl %eax | ||
1179 | pushl %ebp | ||
1180 | pushl %edi | ||
1181 | pushl %esi | ||
1182 | pushl %edx | ||
1183 | pushl %ecx | ||
1184 | pushl %ebx | ||
1185 | |||
1186 | movl 13*4(%esp), %eax /* Get the saved flags */ | ||
1187 | movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ | ||
1188 | /* clobbering return ip */ | ||
1189 | movl $__KERNEL_CS,13*4(%esp) | ||
1190 | |||
1191 | movl 12*4(%esp), %eax /* Load ip (1st parameter) */ | ||
1192 | subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ | ||
1193 | movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ | ||
1194 | leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ | ||
1195 | pushl %esp /* Save pt_regs as 4th parameter */ | ||
1196 | |||
1197 | GLOBAL(ftrace_regs_call) | ||
1198 | call ftrace_stub | ||
1199 | |||
1200 | addl $4, %esp /* Skip pt_regs */ | ||
1201 | movl 14*4(%esp), %eax /* Move flags back into cs */ | ||
1202 | movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ | ||
1203 | movl 12*4(%esp), %eax /* Get return ip from regs->ip */ | ||
1204 | movl %eax, 14*4(%esp) /* Put return ip back for ret */ | ||
1205 | |||
1206 | popl %ebx | ||
1207 | popl %ecx | ||
1208 | popl %edx | ||
1209 | popl %esi | ||
1210 | popl %edi | ||
1211 | popl %ebp | ||
1212 | popl %eax | ||
1213 | popl %ds | ||
1214 | popl %es | ||
1215 | popl %fs | ||
1216 | popl %gs | ||
1217 | addl $8, %esp /* Skip orig_ax and ip */ | ||
1218 | popf /* Pop flags at end (no addl to corrupt flags) */ | ||
1219 | jmp ftrace_ret | ||
1220 | |||
1221 | ftrace_restore_flags: | ||
1222 | popf | ||
1223 | jmp ftrace_stub | ||
1136 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 1224 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
1137 | 1225 | ||
1138 | ENTRY(mcount) | 1226 | ENTRY(mcount) |
@@ -1173,9 +1261,6 @@ END(mcount) | |||
1173 | 1261 | ||
1174 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1262 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1175 | ENTRY(ftrace_graph_caller) | 1263 | ENTRY(ftrace_graph_caller) |
1176 | cmpl $0, function_trace_stop | ||
1177 | jne ftrace_stub | ||
1178 | |||
1179 | pushl %eax | 1264 | pushl %eax |
1180 | pushl %ecx | 1265 | pushl %ecx |
1181 | pushl %edx | 1266 | pushl %edx |
@@ -1209,6 +1294,7 @@ return_to_handler: | |||
1209 | 1294 | ||
1210 | ENTRY(page_fault) | 1295 | ENTRY(page_fault) |
1211 | RING0_EC_FRAME | 1296 | RING0_EC_FRAME |
1297 | ASM_CLAC | ||
1212 | pushl_cfi $do_page_fault | 1298 | pushl_cfi $do_page_fault |
1213 | ALIGN | 1299 | ALIGN |
1214 | error_code: | 1300 | error_code: |
@@ -1281,6 +1367,7 @@ END(page_fault) | |||
1281 | 1367 | ||
1282 | ENTRY(debug) | 1368 | ENTRY(debug) |
1283 | RING0_INT_FRAME | 1369 | RING0_INT_FRAME |
1370 | ASM_CLAC | ||
1284 | cmpl $ia32_sysenter_target,(%esp) | 1371 | cmpl $ia32_sysenter_target,(%esp) |
1285 | jne debug_stack_correct | 1372 | jne debug_stack_correct |
1286 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn | 1373 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn |
@@ -1305,6 +1392,7 @@ END(debug) | |||
1305 | */ | 1392 | */ |
1306 | ENTRY(nmi) | 1393 | ENTRY(nmi) |
1307 | RING0_INT_FRAME | 1394 | RING0_INT_FRAME |
1395 | ASM_CLAC | ||
1308 | pushl_cfi %eax | 1396 | pushl_cfi %eax |
1309 | movl %ss, %eax | 1397 | movl %ss, %eax |
1310 | cmpw $__ESPFIX_SS, %ax | 1398 | cmpw $__ESPFIX_SS, %ax |
@@ -1375,6 +1463,7 @@ END(nmi) | |||
1375 | 1463 | ||
1376 | ENTRY(int3) | 1464 | ENTRY(int3) |
1377 | RING0_INT_FRAME | 1465 | RING0_INT_FRAME |
1466 | ASM_CLAC | ||
1378 | pushl_cfi $-1 # mark this as an int | 1467 | pushl_cfi $-1 # mark this as an int |
1379 | SAVE_ALL | 1468 | SAVE_ALL |
1380 | TRACE_IRQS_OFF | 1469 | TRACE_IRQS_OFF |
@@ -1395,6 +1484,7 @@ END(general_protection) | |||
1395 | #ifdef CONFIG_KVM_GUEST | 1484 | #ifdef CONFIG_KVM_GUEST |
1396 | ENTRY(async_page_fault) | 1485 | ENTRY(async_page_fault) |
1397 | RING0_EC_FRAME | 1486 | RING0_EC_FRAME |
1487 | ASM_CLAC | ||
1398 | pushl_cfi $do_async_page_fault | 1488 | pushl_cfi $do_async_page_fault |
1399 | jmp error_code | 1489 | jmp error_code |
1400 | CFI_ENDPROC | 1490 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index dcdd0ea33a32..b51b2c7ee51f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -56,6 +56,8 @@ | |||
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <asm/asm.h> | 58 | #include <asm/asm.h> |
59 | #include <asm/rcu.h> | ||
60 | #include <asm/smap.h> | ||
59 | #include <linux/err.h> | 61 | #include <linux/err.h> |
60 | 62 | ||
61 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 63 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
@@ -68,25 +70,51 @@ | |||
68 | .section .entry.text, "ax" | 70 | .section .entry.text, "ax" |
69 | 71 | ||
70 | #ifdef CONFIG_FUNCTION_TRACER | 72 | #ifdef CONFIG_FUNCTION_TRACER |
73 | |||
74 | #ifdef CC_USING_FENTRY | ||
75 | # define function_hook __fentry__ | ||
76 | #else | ||
77 | # define function_hook mcount | ||
78 | #endif | ||
79 | |||
71 | #ifdef CONFIG_DYNAMIC_FTRACE | 80 | #ifdef CONFIG_DYNAMIC_FTRACE |
72 | ENTRY(mcount) | 81 | |
82 | ENTRY(function_hook) | ||
73 | retq | 83 | retq |
74 | END(mcount) | 84 | END(function_hook) |
85 | |||
86 | /* skip is set if stack has been adjusted */ | ||
87 | .macro ftrace_caller_setup skip=0 | ||
88 | MCOUNT_SAVE_FRAME \skip | ||
89 | |||
90 | /* Load the ftrace_ops into the 3rd parameter */ | ||
91 | leaq function_trace_op, %rdx | ||
92 | |||
93 | /* Load ip into the first parameter */ | ||
94 | movq RIP(%rsp), %rdi | ||
95 | subq $MCOUNT_INSN_SIZE, %rdi | ||
96 | /* Load the parent_ip into the second parameter */ | ||
97 | #ifdef CC_USING_FENTRY | ||
98 | movq SS+16(%rsp), %rsi | ||
99 | #else | ||
100 | movq 8(%rbp), %rsi | ||
101 | #endif | ||
102 | .endm | ||
75 | 103 | ||
76 | ENTRY(ftrace_caller) | 104 | ENTRY(ftrace_caller) |
105 | /* Check if tracing was disabled (quick check) */ | ||
77 | cmpl $0, function_trace_stop | 106 | cmpl $0, function_trace_stop |
78 | jne ftrace_stub | 107 | jne ftrace_stub |
79 | 108 | ||
80 | MCOUNT_SAVE_FRAME | 109 | ftrace_caller_setup |
81 | 110 | /* regs go into 4th parameter (but make it NULL) */ | |
82 | movq 0x38(%rsp), %rdi | 111 | movq $0, %rcx |
83 | movq 8(%rbp), %rsi | ||
84 | subq $MCOUNT_INSN_SIZE, %rdi | ||
85 | 112 | ||
86 | GLOBAL(ftrace_call) | 113 | GLOBAL(ftrace_call) |
87 | call ftrace_stub | 114 | call ftrace_stub |
88 | 115 | ||
89 | MCOUNT_RESTORE_FRAME | 116 | MCOUNT_RESTORE_FRAME |
117 | ftrace_return: | ||
90 | 118 | ||
91 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 119 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
92 | GLOBAL(ftrace_graph_call) | 120 | GLOBAL(ftrace_graph_call) |
@@ -97,8 +125,78 @@ GLOBAL(ftrace_stub) | |||
97 | retq | 125 | retq |
98 | END(ftrace_caller) | 126 | END(ftrace_caller) |
99 | 127 | ||
128 | ENTRY(ftrace_regs_caller) | ||
129 | /* Save the current flags before compare (in SS location)*/ | ||
130 | pushfq | ||
131 | |||
132 | /* Check if tracing was disabled (quick check) */ | ||
133 | cmpl $0, function_trace_stop | ||
134 | jne ftrace_restore_flags | ||
135 | |||
136 | /* skip=8 to skip flags saved in SS */ | ||
137 | ftrace_caller_setup 8 | ||
138 | |||
139 | /* Save the rest of pt_regs */ | ||
140 | movq %r15, R15(%rsp) | ||
141 | movq %r14, R14(%rsp) | ||
142 | movq %r13, R13(%rsp) | ||
143 | movq %r12, R12(%rsp) | ||
144 | movq %r11, R11(%rsp) | ||
145 | movq %r10, R10(%rsp) | ||
146 | movq %rbp, RBP(%rsp) | ||
147 | movq %rbx, RBX(%rsp) | ||
148 | /* Copy saved flags */ | ||
149 | movq SS(%rsp), %rcx | ||
150 | movq %rcx, EFLAGS(%rsp) | ||
151 | /* Kernel segments */ | ||
152 | movq $__KERNEL_DS, %rcx | ||
153 | movq %rcx, SS(%rsp) | ||
154 | movq $__KERNEL_CS, %rcx | ||
155 | movq %rcx, CS(%rsp) | ||
156 | /* Stack - skipping return address */ | ||
157 | leaq SS+16(%rsp), %rcx | ||
158 | movq %rcx, RSP(%rsp) | ||
159 | |||
160 | /* regs go into 4th parameter */ | ||
161 | leaq (%rsp), %rcx | ||
162 | |||
163 | GLOBAL(ftrace_regs_call) | ||
164 | call ftrace_stub | ||
165 | |||
166 | /* Copy flags back to SS, to restore them */ | ||
167 | movq EFLAGS(%rsp), %rax | ||
168 | movq %rax, SS(%rsp) | ||
169 | |||
170 | /* Handlers can change the RIP */ | ||
171 | movq RIP(%rsp), %rax | ||
172 | movq %rax, SS+8(%rsp) | ||
173 | |||
174 | /* restore the rest of pt_regs */ | ||
175 | movq R15(%rsp), %r15 | ||
176 | movq R14(%rsp), %r14 | ||
177 | movq R13(%rsp), %r13 | ||
178 | movq R12(%rsp), %r12 | ||
179 | movq R10(%rsp), %r10 | ||
180 | movq RBP(%rsp), %rbp | ||
181 | movq RBX(%rsp), %rbx | ||
182 | |||
183 | /* skip=8 to skip flags saved in SS */ | ||
184 | MCOUNT_RESTORE_FRAME 8 | ||
185 | |||
186 | /* Restore flags */ | ||
187 | popfq | ||
188 | |||
189 | jmp ftrace_return | ||
190 | ftrace_restore_flags: | ||
191 | popfq | ||
192 | jmp ftrace_stub | ||
193 | |||
194 | END(ftrace_regs_caller) | ||
195 | |||
196 | |||
100 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 197 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
101 | ENTRY(mcount) | 198 | |
199 | ENTRY(function_hook) | ||
102 | cmpl $0, function_trace_stop | 200 | cmpl $0, function_trace_stop |
103 | jne ftrace_stub | 201 | jne ftrace_stub |
104 | 202 | ||
@@ -119,8 +217,12 @@ GLOBAL(ftrace_stub) | |||
119 | trace: | 217 | trace: |
120 | MCOUNT_SAVE_FRAME | 218 | MCOUNT_SAVE_FRAME |
121 | 219 | ||
122 | movq 0x38(%rsp), %rdi | 220 | movq RIP(%rsp), %rdi |
221 | #ifdef CC_USING_FENTRY | ||
222 | movq SS+16(%rsp), %rsi | ||
223 | #else | ||
123 | movq 8(%rbp), %rsi | 224 | movq 8(%rbp), %rsi |
225 | #endif | ||
124 | subq $MCOUNT_INSN_SIZE, %rdi | 226 | subq $MCOUNT_INSN_SIZE, %rdi |
125 | 227 | ||
126 | call *ftrace_trace_function | 228 | call *ftrace_trace_function |
@@ -128,20 +230,22 @@ trace: | |||
128 | MCOUNT_RESTORE_FRAME | 230 | MCOUNT_RESTORE_FRAME |
129 | 231 | ||
130 | jmp ftrace_stub | 232 | jmp ftrace_stub |
131 | END(mcount) | 233 | END(function_hook) |
132 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 234 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
133 | #endif /* CONFIG_FUNCTION_TRACER */ | 235 | #endif /* CONFIG_FUNCTION_TRACER */ |
134 | 236 | ||
135 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 237 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
136 | ENTRY(ftrace_graph_caller) | 238 | ENTRY(ftrace_graph_caller) |
137 | cmpl $0, function_trace_stop | ||
138 | jne ftrace_stub | ||
139 | |||
140 | MCOUNT_SAVE_FRAME | 239 | MCOUNT_SAVE_FRAME |
141 | 240 | ||
241 | #ifdef CC_USING_FENTRY | ||
242 | leaq SS+16(%rsp), %rdi | ||
243 | movq $0, %rdx /* No framepointers needed */ | ||
244 | #else | ||
142 | leaq 8(%rbp), %rdi | 245 | leaq 8(%rbp), %rdi |
143 | movq 0x38(%rsp), %rsi | ||
144 | movq (%rbp), %rdx | 246 | movq (%rbp), %rdx |
247 | #endif | ||
248 | movq RIP(%rsp), %rsi | ||
145 | subq $MCOUNT_INSN_SIZE, %rsi | 249 | subq $MCOUNT_INSN_SIZE, %rsi |
146 | 250 | ||
147 | call prepare_ftrace_return | 251 | call prepare_ftrace_return |
@@ -342,15 +446,15 @@ ENDPROC(native_usergs_sysret64) | |||
342 | .macro SAVE_ARGS_IRQ | 446 | .macro SAVE_ARGS_IRQ |
343 | cld | 447 | cld |
344 | /* start from rbp in pt_regs and jump over */ | 448 | /* start from rbp in pt_regs and jump over */ |
345 | movq_cfi rdi, RDI-RBP | 449 | movq_cfi rdi, (RDI-RBP) |
346 | movq_cfi rsi, RSI-RBP | 450 | movq_cfi rsi, (RSI-RBP) |
347 | movq_cfi rdx, RDX-RBP | 451 | movq_cfi rdx, (RDX-RBP) |
348 | movq_cfi rcx, RCX-RBP | 452 | movq_cfi rcx, (RCX-RBP) |
349 | movq_cfi rax, RAX-RBP | 453 | movq_cfi rax, (RAX-RBP) |
350 | movq_cfi r8, R8-RBP | 454 | movq_cfi r8, (R8-RBP) |
351 | movq_cfi r9, R9-RBP | 455 | movq_cfi r9, (R9-RBP) |
352 | movq_cfi r10, R10-RBP | 456 | movq_cfi r10, (R10-RBP) |
353 | movq_cfi r11, R11-RBP | 457 | movq_cfi r11, (R11-RBP) |
354 | 458 | ||
355 | /* Save rbp so that we can unwind from get_irq_regs() */ | 459 | /* Save rbp so that we can unwind from get_irq_regs() */ |
356 | movq_cfi rbp, 0 | 460 | movq_cfi rbp, 0 |
@@ -384,7 +488,7 @@ ENDPROC(native_usergs_sysret64) | |||
384 | .endm | 488 | .endm |
385 | 489 | ||
386 | ENTRY(save_rest) | 490 | ENTRY(save_rest) |
387 | PARTIAL_FRAME 1 REST_SKIP+8 | 491 | PARTIAL_FRAME 1 (REST_SKIP+8) |
388 | movq 5*8+16(%rsp), %r11 /* save return address */ | 492 | movq 5*8+16(%rsp), %r11 /* save return address */ |
389 | movq_cfi rbx, RBX+16 | 493 | movq_cfi rbx, RBX+16 |
390 | movq_cfi rbp, RBP+16 | 494 | movq_cfi rbp, RBP+16 |
@@ -440,7 +544,7 @@ ENTRY(ret_from_fork) | |||
440 | 544 | ||
441 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | 545 | LOCK ; btr $TIF_FORK,TI_flags(%r8) |
442 | 546 | ||
443 | pushq_cfi kernel_eflags(%rip) | 547 | pushq_cfi $0x0002 |
444 | popfq_cfi # reset kernel eflags | 548 | popfq_cfi # reset kernel eflags |
445 | 549 | ||
446 | call schedule_tail # rdi: 'prev' task parameter | 550 | call schedule_tail # rdi: 'prev' task parameter |
@@ -450,7 +554,7 @@ ENTRY(ret_from_fork) | |||
450 | RESTORE_REST | 554 | RESTORE_REST |
451 | 555 | ||
452 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | 556 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? |
453 | jz retint_restore_args | 557 | jz 1f |
454 | 558 | ||
455 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | 559 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET |
456 | jnz int_ret_from_sys_call | 560 | jnz int_ret_from_sys_call |
@@ -458,6 +562,14 @@ ENTRY(ret_from_fork) | |||
458 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET | 562 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET |
459 | jmp ret_from_sys_call # go to the SYSRET fastpath | 563 | jmp ret_from_sys_call # go to the SYSRET fastpath |
460 | 564 | ||
565 | 1: | ||
566 | subq $REST_SKIP, %rsp # leave space for volatiles | ||
567 | CFI_ADJUST_CFA_OFFSET REST_SKIP | ||
568 | movq %rbp, %rdi | ||
569 | call *%rbx | ||
570 | movl $0, RAX(%rsp) | ||
571 | RESTORE_REST | ||
572 | jmp int_ret_from_sys_call | ||
461 | CFI_ENDPROC | 573 | CFI_ENDPROC |
462 | END(ret_from_fork) | 574 | END(ret_from_fork) |
463 | 575 | ||
@@ -465,7 +577,8 @@ END(ret_from_fork) | |||
465 | * System call entry. Up to 6 arguments in registers are supported. | 577 | * System call entry. Up to 6 arguments in registers are supported. |
466 | * | 578 | * |
467 | * SYSCALL does not save anything on the stack and does not change the | 579 | * SYSCALL does not save anything on the stack and does not change the |
468 | * stack pointer. | 580 | * stack pointer. However, it does mask the flags register for us, so |
581 | * CLD and CLAC are not needed. | ||
469 | */ | 582 | */ |
470 | 583 | ||
471 | /* | 584 | /* |
@@ -565,7 +678,7 @@ sysret_careful: | |||
565 | TRACE_IRQS_ON | 678 | TRACE_IRQS_ON |
566 | ENABLE_INTERRUPTS(CLBR_NONE) | 679 | ENABLE_INTERRUPTS(CLBR_NONE) |
567 | pushq_cfi %rdi | 680 | pushq_cfi %rdi |
568 | call schedule | 681 | SCHEDULE_USER |
569 | popq_cfi %rdi | 682 | popq_cfi %rdi |
570 | jmp sysret_check | 683 | jmp sysret_check |
571 | 684 | ||
@@ -678,7 +791,7 @@ int_careful: | |||
678 | TRACE_IRQS_ON | 791 | TRACE_IRQS_ON |
679 | ENABLE_INTERRUPTS(CLBR_NONE) | 792 | ENABLE_INTERRUPTS(CLBR_NONE) |
680 | pushq_cfi %rdi | 793 | pushq_cfi %rdi |
681 | call schedule | 794 | SCHEDULE_USER |
682 | popq_cfi %rdi | 795 | popq_cfi %rdi |
683 | DISABLE_INTERRUPTS(CLBR_NONE) | 796 | DISABLE_INTERRUPTS(CLBR_NONE) |
684 | TRACE_IRQS_OFF | 797 | TRACE_IRQS_OFF |
@@ -757,7 +870,6 @@ ENTRY(stub_execve) | |||
757 | PARTIAL_FRAME 0 | 870 | PARTIAL_FRAME 0 |
758 | SAVE_REST | 871 | SAVE_REST |
759 | FIXUP_TOP_OF_STACK %r11 | 872 | FIXUP_TOP_OF_STACK %r11 |
760 | movq %rsp, %rcx | ||
761 | call sys_execve | 873 | call sys_execve |
762 | RESTORE_TOP_OF_STACK %r11 | 874 | RESTORE_TOP_OF_STACK %r11 |
763 | movq %rax,RAX(%rsp) | 875 | movq %rax,RAX(%rsp) |
@@ -807,8 +919,7 @@ ENTRY(stub_x32_execve) | |||
807 | PARTIAL_FRAME 0 | 919 | PARTIAL_FRAME 0 |
808 | SAVE_REST | 920 | SAVE_REST |
809 | FIXUP_TOP_OF_STACK %r11 | 921 | FIXUP_TOP_OF_STACK %r11 |
810 | movq %rsp, %rcx | 922 | call compat_sys_execve |
811 | call sys32_execve | ||
812 | RESTORE_TOP_OF_STACK %r11 | 923 | RESTORE_TOP_OF_STACK %r11 |
813 | movq %rax,RAX(%rsp) | 924 | movq %rax,RAX(%rsp) |
814 | RESTORE_REST | 925 | RESTORE_REST |
@@ -884,6 +995,7 @@ END(interrupt) | |||
884 | */ | 995 | */ |
885 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 996 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
886 | common_interrupt: | 997 | common_interrupt: |
998 | ASM_CLAC | ||
887 | XCPT_FRAME | 999 | XCPT_FRAME |
888 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | 1000 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ |
889 | interrupt do_IRQ | 1001 | interrupt do_IRQ |
@@ -974,7 +1086,7 @@ retint_careful: | |||
974 | TRACE_IRQS_ON | 1086 | TRACE_IRQS_ON |
975 | ENABLE_INTERRUPTS(CLBR_NONE) | 1087 | ENABLE_INTERRUPTS(CLBR_NONE) |
976 | pushq_cfi %rdi | 1088 | pushq_cfi %rdi |
977 | call schedule | 1089 | SCHEDULE_USER |
978 | popq_cfi %rdi | 1090 | popq_cfi %rdi |
979 | GET_THREAD_INFO(%rcx) | 1091 | GET_THREAD_INFO(%rcx) |
980 | DISABLE_INTERRUPTS(CLBR_NONE) | 1092 | DISABLE_INTERRUPTS(CLBR_NONE) |
@@ -1023,6 +1135,7 @@ END(common_interrupt) | |||
1023 | */ | 1135 | */ |
1024 | .macro apicinterrupt num sym do_sym | 1136 | .macro apicinterrupt num sym do_sym |
1025 | ENTRY(\sym) | 1137 | ENTRY(\sym) |
1138 | ASM_CLAC | ||
1026 | INTR_FRAME | 1139 | INTR_FRAME |
1027 | pushq_cfi $~(\num) | 1140 | pushq_cfi $~(\num) |
1028 | .Lcommon_\sym: | 1141 | .Lcommon_\sym: |
@@ -1077,6 +1190,7 @@ apicinterrupt IRQ_WORK_VECTOR \ | |||
1077 | */ | 1190 | */ |
1078 | .macro zeroentry sym do_sym | 1191 | .macro zeroentry sym do_sym |
1079 | ENTRY(\sym) | 1192 | ENTRY(\sym) |
1193 | ASM_CLAC | ||
1080 | INTR_FRAME | 1194 | INTR_FRAME |
1081 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1195 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1082 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1196 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
@@ -1094,6 +1208,7 @@ END(\sym) | |||
1094 | 1208 | ||
1095 | .macro paranoidzeroentry sym do_sym | 1209 | .macro paranoidzeroentry sym do_sym |
1096 | ENTRY(\sym) | 1210 | ENTRY(\sym) |
1211 | ASM_CLAC | ||
1097 | INTR_FRAME | 1212 | INTR_FRAME |
1098 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1213 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1099 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1214 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
@@ -1112,6 +1227,7 @@ END(\sym) | |||
1112 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | 1227 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) |
1113 | .macro paranoidzeroentry_ist sym do_sym ist | 1228 | .macro paranoidzeroentry_ist sym do_sym ist |
1114 | ENTRY(\sym) | 1229 | ENTRY(\sym) |
1230 | ASM_CLAC | ||
1115 | INTR_FRAME | 1231 | INTR_FRAME |
1116 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1232 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1117 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1233 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
@@ -1131,6 +1247,7 @@ END(\sym) | |||
1131 | 1247 | ||
1132 | .macro errorentry sym do_sym | 1248 | .macro errorentry sym do_sym |
1133 | ENTRY(\sym) | 1249 | ENTRY(\sym) |
1250 | ASM_CLAC | ||
1134 | XCPT_FRAME | 1251 | XCPT_FRAME |
1135 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1252 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1136 | subq $ORIG_RAX-R15, %rsp | 1253 | subq $ORIG_RAX-R15, %rsp |
@@ -1149,6 +1266,7 @@ END(\sym) | |||
1149 | /* error code is on the stack already */ | 1266 | /* error code is on the stack already */ |
1150 | .macro paranoiderrorentry sym do_sym | 1267 | .macro paranoiderrorentry sym do_sym |
1151 | ENTRY(\sym) | 1268 | ENTRY(\sym) |
1269 | ASM_CLAC | ||
1152 | XCPT_FRAME | 1270 | XCPT_FRAME |
1153 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1271 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1154 | subq $ORIG_RAX-R15, %rsp | 1272 | subq $ORIG_RAX-R15, %rsp |
@@ -1206,52 +1324,6 @@ bad_gs: | |||
1206 | jmp 2b | 1324 | jmp 2b |
1207 | .previous | 1325 | .previous |
1208 | 1326 | ||
1209 | ENTRY(kernel_thread_helper) | ||
1210 | pushq $0 # fake return address | ||
1211 | CFI_STARTPROC | ||
1212 | /* | ||
1213 | * Here we are in the child and the registers are set as they were | ||
1214 | * at kernel_thread() invocation in the parent. | ||
1215 | */ | ||
1216 | call *%rsi | ||
1217 | # exit | ||
1218 | mov %eax, %edi | ||
1219 | call do_exit | ||
1220 | ud2 # padding for call trace | ||
1221 | CFI_ENDPROC | ||
1222 | END(kernel_thread_helper) | ||
1223 | |||
1224 | /* | ||
1225 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | ||
1226 | * | ||
1227 | * C extern interface: | ||
1228 | * extern long execve(const char *name, char **argv, char **envp) | ||
1229 | * | ||
1230 | * asm input arguments: | ||
1231 | * rdi: name, rsi: argv, rdx: envp | ||
1232 | * | ||
1233 | * We want to fallback into: | ||
1234 | * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) | ||
1235 | * | ||
1236 | * do_sys_execve asm fallback arguments: | ||
1237 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack | ||
1238 | */ | ||
1239 | ENTRY(kernel_execve) | ||
1240 | CFI_STARTPROC | ||
1241 | FAKE_STACK_FRAME $0 | ||
1242 | SAVE_ALL | ||
1243 | movq %rsp,%rcx | ||
1244 | call sys_execve | ||
1245 | movq %rax, RAX(%rsp) | ||
1246 | RESTORE_REST | ||
1247 | testq %rax,%rax | ||
1248 | je int_ret_from_sys_call | ||
1249 | RESTORE_ARGS | ||
1250 | UNFAKE_STACK_FRAME | ||
1251 | ret | ||
1252 | CFI_ENDPROC | ||
1253 | END(kernel_execve) | ||
1254 | |||
1255 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1327 | /* Call softirq on interrupt stack. Interrupts are off. */ |
1256 | ENTRY(call_softirq) | 1328 | ENTRY(call_softirq) |
1257 | CFI_STARTPROC | 1329 | CFI_STARTPROC |
@@ -1449,7 +1521,7 @@ paranoid_userspace: | |||
1449 | paranoid_schedule: | 1521 | paranoid_schedule: |
1450 | TRACE_IRQS_ON | 1522 | TRACE_IRQS_ON |
1451 | ENABLE_INTERRUPTS(CLBR_ANY) | 1523 | ENABLE_INTERRUPTS(CLBR_ANY) |
1452 | call schedule | 1524 | SCHEDULE_USER |
1453 | DISABLE_INTERRUPTS(CLBR_ANY) | 1525 | DISABLE_INTERRUPTS(CLBR_ANY) |
1454 | TRACE_IRQS_OFF | 1526 | TRACE_IRQS_OFF |
1455 | jmp paranoid_userspace | 1527 | jmp paranoid_userspace |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e6..1d414029f1d8 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -206,6 +206,21 @@ static int | |||
206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | 206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, |
207 | unsigned const char *new_code); | 207 | unsigned const char *new_code); |
208 | 208 | ||
209 | /* | ||
210 | * Should never be called: | ||
211 | * As it is only called by __ftrace_replace_code() which is called by | ||
212 | * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() | ||
213 | * which is called to turn mcount into nops or nops into function calls | ||
214 | * but not to convert a function from not using regs to one that uses | ||
215 | * regs, which ftrace_modify_call() is for. | ||
216 | */ | ||
217 | int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, | ||
218 | unsigned long addr) | ||
219 | { | ||
220 | WARN_ON(1); | ||
221 | return -EINVAL; | ||
222 | } | ||
223 | |||
209 | int ftrace_update_ftrace_func(ftrace_func_t func) | 224 | int ftrace_update_ftrace_func(ftrace_func_t func) |
210 | { | 225 | { |
211 | unsigned long ip = (unsigned long)(&ftrace_call); | 226 | unsigned long ip = (unsigned long)(&ftrace_call); |
@@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
220 | 235 | ||
221 | ret = ftrace_modify_code(ip, old, new); | 236 | ret = ftrace_modify_code(ip, old, new); |
222 | 237 | ||
238 | /* Also update the regs callback function */ | ||
239 | if (!ret) { | ||
240 | ip = (unsigned long)(&ftrace_regs_call); | ||
241 | memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); | ||
242 | new = ftrace_call_replace(ip, (unsigned long)func); | ||
243 | ret = ftrace_modify_code(ip, old, new); | ||
244 | } | ||
245 | |||
223 | atomic_dec(&modifying_ftrace_code); | 246 | atomic_dec(&modifying_ftrace_code); |
224 | 247 | ||
225 | return ret; | 248 | return ret; |
@@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) | |||
299 | return add_break(rec->ip, old); | 322 | return add_break(rec->ip, old); |
300 | } | 323 | } |
301 | 324 | ||
325 | /* | ||
326 | * If the record has the FTRACE_FL_REGS set, that means that it | ||
327 | * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS | ||
328 | * is not not set, then it wants to convert to the normal callback. | ||
329 | */ | ||
330 | static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) | ||
331 | { | ||
332 | if (rec->flags & FTRACE_FL_REGS) | ||
333 | return (unsigned long)FTRACE_REGS_ADDR; | ||
334 | else | ||
335 | return (unsigned long)FTRACE_ADDR; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * The FTRACE_FL_REGS_EN is set when the record already points to | ||
340 | * a function that saves all the regs. Basically the '_EN' version | ||
341 | * represents the current state of the function. | ||
342 | */ | ||
343 | static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) | ||
344 | { | ||
345 | if (rec->flags & FTRACE_FL_REGS_EN) | ||
346 | return (unsigned long)FTRACE_REGS_ADDR; | ||
347 | else | ||
348 | return (unsigned long)FTRACE_ADDR; | ||
349 | } | ||
350 | |||
302 | static int add_breakpoints(struct dyn_ftrace *rec, int enable) | 351 | static int add_breakpoints(struct dyn_ftrace *rec, int enable) |
303 | { | 352 | { |
304 | unsigned long ftrace_addr; | 353 | unsigned long ftrace_addr; |
@@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) | |||
306 | 355 | ||
307 | ret = ftrace_test_record(rec, enable); | 356 | ret = ftrace_test_record(rec, enable); |
308 | 357 | ||
309 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 358 | ftrace_addr = get_ftrace_addr(rec); |
310 | 359 | ||
311 | switch (ret) { | 360 | switch (ret) { |
312 | case FTRACE_UPDATE_IGNORE: | 361 | case FTRACE_UPDATE_IGNORE: |
@@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) | |||
316 | /* converting nop to call */ | 365 | /* converting nop to call */ |
317 | return add_brk_on_nop(rec); | 366 | return add_brk_on_nop(rec); |
318 | 367 | ||
368 | case FTRACE_UPDATE_MODIFY_CALL_REGS: | ||
369 | case FTRACE_UPDATE_MODIFY_CALL: | ||
370 | ftrace_addr = get_ftrace_old_addr(rec); | ||
371 | /* fall through */ | ||
319 | case FTRACE_UPDATE_MAKE_NOP: | 372 | case FTRACE_UPDATE_MAKE_NOP: |
320 | /* converting a call to a nop */ | 373 | /* converting a call to a nop */ |
321 | return add_brk_on_call(rec, ftrace_addr); | 374 | return add_brk_on_call(rec, ftrace_addr); |
@@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) | |||
360 | * If not, don't touch the breakpoint, we make just create | 413 | * If not, don't touch the breakpoint, we make just create |
361 | * a disaster. | 414 | * a disaster. |
362 | */ | 415 | */ |
363 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 416 | ftrace_addr = get_ftrace_addr(rec); |
417 | nop = ftrace_call_replace(ip, ftrace_addr); | ||
418 | |||
419 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) | ||
420 | goto update; | ||
421 | |||
422 | /* Check both ftrace_addr and ftrace_old_addr */ | ||
423 | ftrace_addr = get_ftrace_old_addr(rec); | ||
364 | nop = ftrace_call_replace(ip, ftrace_addr); | 424 | nop = ftrace_call_replace(ip, ftrace_addr); |
365 | 425 | ||
366 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) | 426 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) |
367 | return -EINVAL; | 427 | return -EINVAL; |
368 | } | 428 | } |
369 | 429 | ||
430 | update: | ||
370 | return probe_kernel_write((void *)ip, &nop[0], 1); | 431 | return probe_kernel_write((void *)ip, &nop[0], 1); |
371 | } | 432 | } |
372 | 433 | ||
@@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) | |||
405 | 466 | ||
406 | ret = ftrace_test_record(rec, enable); | 467 | ret = ftrace_test_record(rec, enable); |
407 | 468 | ||
408 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 469 | ftrace_addr = get_ftrace_addr(rec); |
409 | 470 | ||
410 | switch (ret) { | 471 | switch (ret) { |
411 | case FTRACE_UPDATE_IGNORE: | 472 | case FTRACE_UPDATE_IGNORE: |
412 | return 0; | 473 | return 0; |
413 | 474 | ||
475 | case FTRACE_UPDATE_MODIFY_CALL_REGS: | ||
476 | case FTRACE_UPDATE_MODIFY_CALL: | ||
414 | case FTRACE_UPDATE_MAKE_CALL: | 477 | case FTRACE_UPDATE_MAKE_CALL: |
415 | /* converting nop to call */ | 478 | /* converting nop to call */ |
416 | return add_update_call(rec, ftrace_addr); | 479 | return add_update_call(rec, ftrace_addr); |
@@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) | |||
455 | 518 | ||
456 | ret = ftrace_update_record(rec, enable); | 519 | ret = ftrace_update_record(rec, enable); |
457 | 520 | ||
458 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 521 | ftrace_addr = get_ftrace_addr(rec); |
459 | 522 | ||
460 | switch (ret) { | 523 | switch (ret) { |
461 | case FTRACE_UPDATE_IGNORE: | 524 | case FTRACE_UPDATE_IGNORE: |
462 | return 0; | 525 | return 0; |
463 | 526 | ||
527 | case FTRACE_UPDATE_MODIFY_CALL_REGS: | ||
528 | case FTRACE_UPDATE_MODIFY_CALL: | ||
464 | case FTRACE_UPDATE_MAKE_CALL: | 529 | case FTRACE_UPDATE_MAKE_CALL: |
465 | /* converting nop to call */ | 530 | /* converting nop to call */ |
466 | return finish_update_call(rec, ftrace_addr); | 531 | return finish_update_call(rec, ftrace_addr); |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d42ab17b7397..957a47aec64e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -287,27 +287,28 @@ ENTRY(startup_32_smp) | |||
287 | leal -__PAGE_OFFSET(%ecx),%esp | 287 | leal -__PAGE_OFFSET(%ecx),%esp |
288 | 288 | ||
289 | default_entry: | 289 | default_entry: |
290 | |||
291 | /* | 290 | /* |
292 | * New page tables may be in 4Mbyte page mode and may | 291 | * New page tables may be in 4Mbyte page mode and may |
293 | * be using the global pages. | 292 | * be using the global pages. |
294 | * | 293 | * |
295 | * NOTE! If we are on a 486 we may have no cr4 at all! | 294 | * NOTE! If we are on a 486 we may have no cr4 at all! |
296 | * So we do not try to touch it unless we really have | 295 | * Specifically, cr4 exists if and only if CPUID exists, |
297 | * some bits in it to set. This won't work if the BSP | 296 | * which in turn exists if and only if EFLAGS.ID exists. |
298 | * implements cr4 but this AP does not -- very unlikely | ||
299 | * but be warned! The same applies to the pse feature | ||
300 | * if not equally supported. --macro | ||
301 | * | ||
302 | * NOTE! We have to correct for the fact that we're | ||
303 | * not yet offset PAGE_OFFSET.. | ||
304 | */ | 297 | */ |
305 | #define cr4_bits pa(mmu_cr4_features) | 298 | movl $X86_EFLAGS_ID,%ecx |
306 | movl cr4_bits,%edx | 299 | pushl %ecx |
307 | andl %edx,%edx | 300 | popfl |
308 | jz 6f | 301 | pushfl |
309 | movl %cr4,%eax # Turn on paging options (PSE,PAE,..) | 302 | popl %eax |
310 | orl %edx,%eax | 303 | pushl $0 |
304 | popfl | ||
305 | pushfl | ||
306 | popl %edx | ||
307 | xorl %edx,%eax | ||
308 | testl %ecx,%eax | ||
309 | jz 6f # No ID flag = no CPUID = no CR4 | ||
310 | |||
311 | movl pa(mmu_cr4_features),%eax | ||
311 | movl %eax,%cr4 | 312 | movl %eax,%cr4 |
312 | 313 | ||
313 | testb $X86_CR4_PAE, %al # check if PAE is enabled | 314 | testb $X86_CR4_PAE, %al # check if PAE is enabled |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f250431fb505..675a05012449 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -19,24 +19,17 @@ | |||
19 | #include <asm/fpu-internal.h> | 19 | #include <asm/fpu-internal.h> |
20 | #include <asm/user.h> | 20 | #include <asm/user.h> |
21 | 21 | ||
22 | #ifdef CONFIG_X86_64 | ||
23 | # include <asm/sigcontext32.h> | ||
24 | # include <asm/user32.h> | ||
25 | #else | ||
26 | # define save_i387_xstate_ia32 save_i387_xstate | ||
27 | # define restore_i387_xstate_ia32 restore_i387_xstate | ||
28 | # define _fpstate_ia32 _fpstate | ||
29 | # define _xstate_ia32 _xstate | ||
30 | # define sig_xstate_ia32_size sig_xstate_size | ||
31 | # define fx_sw_reserved_ia32 fx_sw_reserved | ||
32 | # define user_i387_ia32_struct user_i387_struct | ||
33 | # define user32_fxsr_struct user_fxsr_struct | ||
34 | #endif | ||
35 | |||
36 | /* | 22 | /* |
37 | * Were we in an interrupt that interrupted kernel mode? | 23 | * Were we in an interrupt that interrupted kernel mode? |
38 | * | 24 | * |
39 | * We can do a kernel_fpu_begin/end() pair *ONLY* if that | 25 | * For now, with eagerfpu we will return interrupted kernel FPU |
26 | * state as not-idle. TBD: Ideally we can change the return value | ||
27 | * to something like __thread_has_fpu(current). But we need to | ||
28 | * be careful of doing __thread_clear_has_fpu() before saving | ||
29 | * the FPU etc for supporting nested uses etc. For now, take | ||
30 | * the simple route! | ||
31 | * | ||
32 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
40 | * pair does nothing at all: the thread must not have fpu (so | 33 | * pair does nothing at all: the thread must not have fpu (so |
41 | * that we don't try to save the FPU state), and TS must | 34 | * that we don't try to save the FPU state), and TS must |
42 | * be set (so that the clts/stts pair does nothing that is | 35 | * be set (so that the clts/stts pair does nothing that is |
@@ -44,6 +37,9 @@ | |||
44 | */ | 37 | */ |
45 | static inline bool interrupted_kernel_fpu_idle(void) | 38 | static inline bool interrupted_kernel_fpu_idle(void) |
46 | { | 39 | { |
40 | if (use_eager_fpu()) | ||
41 | return 0; | ||
42 | |||
47 | return !__thread_has_fpu(current) && | 43 | return !__thread_has_fpu(current) && |
48 | (read_cr0() & X86_CR0_TS); | 44 | (read_cr0() & X86_CR0_TS); |
49 | } | 45 | } |
@@ -77,29 +73,29 @@ bool irq_fpu_usable(void) | |||
77 | } | 73 | } |
78 | EXPORT_SYMBOL(irq_fpu_usable); | 74 | EXPORT_SYMBOL(irq_fpu_usable); |
79 | 75 | ||
80 | void kernel_fpu_begin(void) | 76 | void __kernel_fpu_begin(void) |
81 | { | 77 | { |
82 | struct task_struct *me = current; | 78 | struct task_struct *me = current; |
83 | 79 | ||
84 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
85 | preempt_disable(); | ||
86 | if (__thread_has_fpu(me)) { | 80 | if (__thread_has_fpu(me)) { |
87 | __save_init_fpu(me); | 81 | __save_init_fpu(me); |
88 | __thread_clear_has_fpu(me); | 82 | __thread_clear_has_fpu(me); |
89 | /* We do 'stts()' in kernel_fpu_end() */ | 83 | /* We do 'stts()' in __kernel_fpu_end() */ |
90 | } else { | 84 | } else if (!use_eager_fpu()) { |
91 | this_cpu_write(fpu_owner_task, NULL); | 85 | this_cpu_write(fpu_owner_task, NULL); |
92 | clts(); | 86 | clts(); |
93 | } | 87 | } |
94 | } | 88 | } |
95 | EXPORT_SYMBOL(kernel_fpu_begin); | 89 | EXPORT_SYMBOL(__kernel_fpu_begin); |
96 | 90 | ||
97 | void kernel_fpu_end(void) | 91 | void __kernel_fpu_end(void) |
98 | { | 92 | { |
99 | stts(); | 93 | if (use_eager_fpu()) |
100 | preempt_enable(); | 94 | math_state_restore(); |
95 | else | ||
96 | stts(); | ||
101 | } | 97 | } |
102 | EXPORT_SYMBOL(kernel_fpu_end); | 98 | EXPORT_SYMBOL(__kernel_fpu_end); |
103 | 99 | ||
104 | void unlazy_fpu(struct task_struct *tsk) | 100 | void unlazy_fpu(struct task_struct *tsk) |
105 | { | 101 | { |
@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk) | |||
113 | } | 109 | } |
114 | EXPORT_SYMBOL(unlazy_fpu); | 110 | EXPORT_SYMBOL(unlazy_fpu); |
115 | 111 | ||
116 | #ifdef CONFIG_MATH_EMULATION | 112 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
117 | # define HAVE_HWFP (boot_cpu_data.hard_math) | ||
118 | #else | ||
119 | # define HAVE_HWFP 1 | ||
120 | #endif | ||
121 | |||
122 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | ||
123 | unsigned int xstate_size; | 113 | unsigned int xstate_size; |
124 | EXPORT_SYMBOL_GPL(xstate_size); | 114 | EXPORT_SYMBOL_GPL(xstate_size); |
125 | unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); | ||
126 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; | 115 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; |
127 | 116 | ||
128 | static void __cpuinit mxcsr_feature_mask_init(void) | 117 | static void __cpuinit mxcsr_feature_mask_init(void) |
129 | { | 118 | { |
130 | unsigned long mask = 0; | 119 | unsigned long mask = 0; |
131 | 120 | ||
132 | clts(); | ||
133 | if (cpu_has_fxsr) { | 121 | if (cpu_has_fxsr) { |
134 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); | 122 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); |
135 | asm volatile("fxsave %0" : : "m" (fx_scratch)); | 123 | asm volatile("fxsave %0" : : "m" (fx_scratch)); |
@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) | |||
138 | mask = 0x0000ffbf; | 126 | mask = 0x0000ffbf; |
139 | } | 127 | } |
140 | mxcsr_feature_mask &= mask; | 128 | mxcsr_feature_mask &= mask; |
141 | stts(); | ||
142 | } | 129 | } |
143 | 130 | ||
144 | static void __cpuinit init_thread_xstate(void) | 131 | static void __cpuinit init_thread_xstate(void) |
@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void) | |||
192 | init_thread_xstate(); | 179 | init_thread_xstate(); |
193 | 180 | ||
194 | mxcsr_feature_mask_init(); | 181 | mxcsr_feature_mask_init(); |
195 | /* clean state in init */ | 182 | xsave_init(); |
196 | current_thread_info()->status = 0; | 183 | eager_fpu_init(); |
197 | clear_used_math(); | ||
198 | } | 184 | } |
199 | 185 | ||
200 | void fpu_finit(struct fpu *fpu) | 186 | void fpu_finit(struct fpu *fpu) |
@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu) | |||
205 | } | 191 | } |
206 | 192 | ||
207 | if (cpu_has_fxsr) { | 193 | if (cpu_has_fxsr) { |
208 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | 194 | fx_finit(&fpu->state->fxsave); |
209 | |||
210 | memset(fx, 0, xstate_size); | ||
211 | fx->cwd = 0x37f; | ||
212 | if (cpu_has_xmm) | ||
213 | fx->mxcsr = MXCSR_DEFAULT; | ||
214 | } else { | 195 | } else { |
215 | struct i387_fsave_struct *fp = &fpu->state->fsave; | 196 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
216 | memset(fp, 0, xstate_size); | 197 | memset(fp, 0, xstate_size); |
@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | |||
454 | * FXSR floating point environment conversions. | 435 | * FXSR floating point environment conversions. |
455 | */ | 436 | */ |
456 | 437 | ||
457 | static void | 438 | void |
458 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | 439 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
459 | { | 440 | { |
460 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; | 441 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
@@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | |||
491 | memcpy(&to[i], &from[i], sizeof(to[0])); | 472 | memcpy(&to[i], &from[i], sizeof(to[0])); |
492 | } | 473 | } |
493 | 474 | ||
494 | static void convert_to_fxsr(struct task_struct *tsk, | 475 | void convert_to_fxsr(struct task_struct *tsk, |
495 | const struct user_i387_ia32_struct *env) | 476 | const struct user_i387_ia32_struct *env) |
496 | 477 | ||
497 | { | 478 | { |
498 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; | 479 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
@@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
589 | } | 570 | } |
590 | 571 | ||
591 | /* | 572 | /* |
592 | * Signal frame handlers. | ||
593 | */ | ||
594 | |||
595 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | ||
596 | { | ||
597 | struct task_struct *tsk = current; | ||
598 | struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; | ||
599 | |||
600 | fp->status = fp->swd; | ||
601 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | ||
602 | return -1; | ||
603 | return 1; | ||
604 | } | ||
605 | |||
606 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | ||
607 | { | ||
608 | struct task_struct *tsk = current; | ||
609 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; | ||
610 | struct user_i387_ia32_struct env; | ||
611 | int err = 0; | ||
612 | |||
613 | convert_from_fxsr(&env, tsk); | ||
614 | if (__copy_to_user(buf, &env, sizeof(env))) | ||
615 | return -1; | ||
616 | |||
617 | err |= __put_user(fx->swd, &buf->status); | ||
618 | err |= __put_user(X86_FXSR_MAGIC, &buf->magic); | ||
619 | if (err) | ||
620 | return -1; | ||
621 | |||
622 | if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) | ||
623 | return -1; | ||
624 | return 1; | ||
625 | } | ||
626 | |||
627 | static int save_i387_xsave(void __user *buf) | ||
628 | { | ||
629 | struct task_struct *tsk = current; | ||
630 | struct _fpstate_ia32 __user *fx = buf; | ||
631 | int err = 0; | ||
632 | |||
633 | |||
634 | sanitize_i387_state(tsk); | ||
635 | |||
636 | /* | ||
637 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
638 | * vector while saving the state to the user context. | ||
639 | * This will enable us capturing any changes(during sigreturn) to | ||
640 | * the FP/SSE bits by the legacy applications which don't touch | ||
641 | * xstate_bv in the xsave header. | ||
642 | * | ||
643 | * xsave aware applications can change the xstate_bv in the xsave | ||
644 | * header as well as change any contents in the memory layout. | ||
645 | * xrestore as part of sigreturn will capture all the changes. | ||
646 | */ | ||
647 | tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | ||
648 | |||
649 | if (save_i387_fxsave(fx) < 0) | ||
650 | return -1; | ||
651 | |||
652 | err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, | ||
653 | sizeof(struct _fpx_sw_bytes)); | ||
654 | err |= __put_user(FP_XSTATE_MAGIC2, | ||
655 | (__u32 __user *) (buf + sig_xstate_ia32_size | ||
656 | - FP_XSTATE_MAGIC2_SIZE)); | ||
657 | if (err) | ||
658 | return -1; | ||
659 | |||
660 | return 1; | ||
661 | } | ||
662 | |||
663 | int save_i387_xstate_ia32(void __user *buf) | ||
664 | { | ||
665 | struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; | ||
666 | struct task_struct *tsk = current; | ||
667 | |||
668 | if (!used_math()) | ||
669 | return 0; | ||
670 | |||
671 | if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) | ||
672 | return -EACCES; | ||
673 | /* | ||
674 | * This will cause a "finit" to be triggered by the next | ||
675 | * attempted FPU operation by the 'current' process. | ||
676 | */ | ||
677 | clear_used_math(); | ||
678 | |||
679 | if (!HAVE_HWFP) { | ||
680 | return fpregs_soft_get(current, NULL, | ||
681 | 0, sizeof(struct user_i387_ia32_struct), | ||
682 | NULL, fp) ? -1 : 1; | ||
683 | } | ||
684 | |||
685 | unlazy_fpu(tsk); | ||
686 | |||
687 | if (cpu_has_xsave) | ||
688 | return save_i387_xsave(fp); | ||
689 | if (cpu_has_fxsr) | ||
690 | return save_i387_fxsave(fp); | ||
691 | else | ||
692 | return save_i387_fsave(fp); | ||
693 | } | ||
694 | |||
695 | static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | ||
696 | { | ||
697 | struct task_struct *tsk = current; | ||
698 | |||
699 | return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, | ||
700 | sizeof(struct i387_fsave_struct)); | ||
701 | } | ||
702 | |||
703 | static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, | ||
704 | unsigned int size) | ||
705 | { | ||
706 | struct task_struct *tsk = current; | ||
707 | struct user_i387_ia32_struct env; | ||
708 | int err; | ||
709 | |||
710 | err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], | ||
711 | size); | ||
712 | /* mxcsr reserved bits must be masked to zero for security reasons */ | ||
713 | tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; | ||
714 | if (err || __copy_from_user(&env, buf, sizeof(env))) | ||
715 | return 1; | ||
716 | convert_to_fxsr(tsk, &env); | ||
717 | |||
718 | return 0; | ||
719 | } | ||
720 | |||
721 | static int restore_i387_xsave(void __user *buf) | ||
722 | { | ||
723 | struct _fpx_sw_bytes fx_sw_user; | ||
724 | struct _fpstate_ia32 __user *fx_user = | ||
725 | ((struct _fpstate_ia32 __user *) buf); | ||
726 | struct i387_fxsave_struct __user *fx = | ||
727 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; | ||
728 | struct xsave_hdr_struct *xsave_hdr = | ||
729 | ¤t->thread.fpu.state->xsave.xsave_hdr; | ||
730 | u64 mask; | ||
731 | int err; | ||
732 | |||
733 | if (check_for_xstate(fx, buf, &fx_sw_user)) | ||
734 | goto fx_only; | ||
735 | |||
736 | mask = fx_sw_user.xstate_bv; | ||
737 | |||
738 | err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); | ||
739 | |||
740 | xsave_hdr->xstate_bv &= pcntxt_mask; | ||
741 | /* | ||
742 | * These bits must be zero. | ||
743 | */ | ||
744 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
745 | |||
746 | /* | ||
747 | * Init the state that is not present in the memory layout | ||
748 | * and enabled by the OS. | ||
749 | */ | ||
750 | mask = ~(pcntxt_mask & ~mask); | ||
751 | xsave_hdr->xstate_bv &= mask; | ||
752 | |||
753 | return err; | ||
754 | fx_only: | ||
755 | /* | ||
756 | * Couldn't find the extended state information in the memory | ||
757 | * layout. Restore the FP/SSE and init the other extended state | ||
758 | * enabled by the OS. | ||
759 | */ | ||
760 | xsave_hdr->xstate_bv = XSTATE_FPSSE; | ||
761 | return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); | ||
762 | } | ||
763 | |||
764 | int restore_i387_xstate_ia32(void __user *buf) | ||
765 | { | ||
766 | int err; | ||
767 | struct task_struct *tsk = current; | ||
768 | struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; | ||
769 | |||
770 | if (HAVE_HWFP) | ||
771 | clear_fpu(tsk); | ||
772 | |||
773 | if (!buf) { | ||
774 | if (used_math()) { | ||
775 | clear_fpu(tsk); | ||
776 | clear_used_math(); | ||
777 | } | ||
778 | |||
779 | return 0; | ||
780 | } else | ||
781 | if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) | ||
782 | return -EACCES; | ||
783 | |||
784 | if (!used_math()) { | ||
785 | err = init_fpu(tsk); | ||
786 | if (err) | ||
787 | return err; | ||
788 | } | ||
789 | |||
790 | if (HAVE_HWFP) { | ||
791 | if (cpu_has_xsave) | ||
792 | err = restore_i387_xsave(buf); | ||
793 | else if (cpu_has_fxsr) | ||
794 | err = restore_i387_fxsave(fp, sizeof(struct | ||
795 | i387_fxsave_struct)); | ||
796 | else | ||
797 | err = restore_i387_fsave(fp); | ||
798 | } else { | ||
799 | err = fpregs_soft_set(current, NULL, | ||
800 | 0, sizeof(struct user_i387_ia32_struct), | ||
801 | NULL, fp) != 0; | ||
802 | } | ||
803 | set_used_math(); | ||
804 | |||
805 | return err; | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * FPU state for core dumps. | 573 | * FPU state for core dumps. |
810 | * This is only used for a.out dumps now. | 574 | * This is only used for a.out dumps now. |
811 | * It is declared generically using elf_fpregset_t (which is | 575 | * It is declared generically using elf_fpregset_t (which is |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 36d1853e91af..9a5c460404dc 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -263,7 +263,7 @@ static void i8259A_shutdown(void) | |||
263 | * out of. | 263 | * out of. |
264 | */ | 264 | */ |
265 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 265 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
266 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ | 266 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
267 | } | 267 | } |
268 | 268 | ||
269 | static struct syscore_ops i8259_syscore_ops = { | 269 | static struct syscore_ops i8259_syscore_ops = { |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d44f7829968e..e4595f105910 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) | |||
92 | seq_printf(p, " Rescheduling interrupts\n"); | 92 | seq_printf(p, " Rescheduling interrupts\n"); |
93 | seq_printf(p, "%*s: ", prec, "CAL"); | 93 | seq_printf(p, "%*s: ", prec, "CAL"); |
94 | for_each_online_cpu(j) | 94 | for_each_online_cpu(j) |
95 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); | 95 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count - |
96 | irq_stats(j)->irq_tlb_count); | ||
96 | seq_printf(p, " Function call interrupts\n"); | 97 | seq_printf(p, " Function call interrupts\n"); |
97 | seq_printf(p, "%*s: ", prec, "TLB"); | 98 | seq_printf(p, "%*s: ", prec, "TLB"); |
98 | for_each_online_cpu(j) | 99 | for_each_online_cpu(j) |
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
147 | #ifdef CONFIG_SMP | 148 | #ifdef CONFIG_SMP |
148 | sum += irq_stats(cpu)->irq_resched_count; | 149 | sum += irq_stats(cpu)->irq_resched_count; |
149 | sum += irq_stats(cpu)->irq_call_count; | 150 | sum += irq_stats(cpu)->irq_call_count; |
150 | sum += irq_stats(cpu)->irq_tlb_count; | ||
151 | #endif | 151 | #endif |
152 | #ifdef CONFIG_X86_THERMAL_VECTOR | 152 | #ifdef CONFIG_X86_THERMAL_VECTOR |
153 | sum += irq_stats(cpu)->irq_thermal_count; | 153 | sum += irq_stats(cpu)->irq_thermal_count; |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 3f61904365cf..836f8322960e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) | |||
746 | int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) | 746 | int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) |
747 | { | 747 | { |
748 | int err; | 748 | int err; |
749 | #ifdef CONFIG_DEBUG_RODATA | ||
749 | char opc[BREAK_INSTR_SIZE]; | 750 | char opc[BREAK_INSTR_SIZE]; |
751 | #endif /* CONFIG_DEBUG_RODATA */ | ||
750 | 752 | ||
751 | bpt->type = BP_BREAKPOINT; | 753 | bpt->type = BP_BREAKPOINT; |
752 | err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, | 754 | err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b1..57916c0d3cf6 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb | |||
541 | return 1; | 541 | return 1; |
542 | } | 542 | } |
543 | 543 | ||
544 | #ifdef KPROBES_CAN_USE_FTRACE | ||
545 | static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
546 | struct kprobe_ctlblk *kcb) | ||
547 | { | ||
548 | /* | ||
549 | * Emulate singlestep (and also recover regs->ip) | ||
550 | * as if there is a 5byte nop | ||
551 | */ | ||
552 | regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; | ||
553 | if (unlikely(p->post_handler)) { | ||
554 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
555 | p->post_handler(p, regs, 0); | ||
556 | } | ||
557 | __this_cpu_write(current_kprobe, NULL); | ||
558 | } | ||
559 | #endif | ||
560 | |||
544 | /* | 561 | /* |
545 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | 562 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they |
546 | * remain disabled throughout this function. | 563 | * remain disabled throughout this function. |
@@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
599 | } else if (kprobe_running()) { | 616 | } else if (kprobe_running()) { |
600 | p = __this_cpu_read(current_kprobe); | 617 | p = __this_cpu_read(current_kprobe); |
601 | if (p->break_handler && p->break_handler(p, regs)) { | 618 | if (p->break_handler && p->break_handler(p, regs)) { |
619 | #ifdef KPROBES_CAN_USE_FTRACE | ||
620 | if (kprobe_ftrace(p)) { | ||
621 | skip_singlestep(p, regs, kcb); | ||
622 | return 1; | ||
623 | } | ||
624 | #endif | ||
602 | setup_singlestep(p, regs, kcb, 0); | 625 | setup_singlestep(p, regs, kcb, 0); |
603 | return 1; | 626 | return 1; |
604 | } | 627 | } |
@@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1052 | return 0; | 1075 | return 0; |
1053 | } | 1076 | } |
1054 | 1077 | ||
1078 | #ifdef KPROBES_CAN_USE_FTRACE | ||
1079 | /* Ftrace callback handler for kprobes */ | ||
1080 | void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, | ||
1081 | struct ftrace_ops *ops, struct pt_regs *regs) | ||
1082 | { | ||
1083 | struct kprobe *p; | ||
1084 | struct kprobe_ctlblk *kcb; | ||
1085 | unsigned long flags; | ||
1086 | |||
1087 | /* Disable irq for emulating a breakpoint and avoiding preempt */ | ||
1088 | local_irq_save(flags); | ||
1089 | |||
1090 | p = get_kprobe((kprobe_opcode_t *)ip); | ||
1091 | if (unlikely(!p) || kprobe_disabled(p)) | ||
1092 | goto end; | ||
1093 | |||
1094 | kcb = get_kprobe_ctlblk(); | ||
1095 | if (kprobe_running()) { | ||
1096 | kprobes_inc_nmissed_count(p); | ||
1097 | } else { | ||
1098 | /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ | ||
1099 | regs->ip = ip + sizeof(kprobe_opcode_t); | ||
1100 | |||
1101 | __this_cpu_write(current_kprobe, p); | ||
1102 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1103 | if (!p->pre_handler || !p->pre_handler(p, regs)) | ||
1104 | skip_singlestep(p, regs, kcb); | ||
1105 | /* | ||
1106 | * If pre_handler returns !0, it sets regs->ip and | ||
1107 | * resets current kprobe. | ||
1108 | */ | ||
1109 | } | ||
1110 | end: | ||
1111 | local_irq_restore(flags); | ||
1112 | } | ||
1113 | |||
1114 | int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) | ||
1115 | { | ||
1116 | p->ainsn.insn = NULL; | ||
1117 | p->ainsn.boostable = -1; | ||
1118 | return 0; | ||
1119 | } | ||
1120 | #endif | ||
1121 | |||
1055 | int __init arch_init_kprobes(void) | 1122 | int __init arch_init_kprobes(void) |
1056 | { | 1123 | { |
1057 | return arch_init_optprobes(); | 1124 | return arch_init_optprobes(); |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c1d61ee4b4f1..b3e5e51bc907 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused) | |||
354 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | 354 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
355 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | 355 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); |
356 | kvm_pv_disable_apf(); | 356 | kvm_pv_disable_apf(); |
357 | kvm_disable_steal_time(); | ||
357 | } | 358 | } |
358 | 359 | ||
359 | static int kvm_pv_reboot_notify(struct notifier_block *nb, | 360 | static int kvm_pv_reboot_notify(struct notifier_block *nb, |
@@ -396,9 +397,7 @@ void kvm_disable_steal_time(void) | |||
396 | #ifdef CONFIG_SMP | 397 | #ifdef CONFIG_SMP |
397 | static void __init kvm_smp_prepare_boot_cpu(void) | 398 | static void __init kvm_smp_prepare_boot_cpu(void) |
398 | { | 399 | { |
399 | #ifdef CONFIG_KVM_CLOCK | ||
400 | WARN_ON(kvm_register_clock("primary cpu clock")); | 400 | WARN_ON(kvm_register_clock("primary cpu clock")); |
401 | #endif | ||
402 | kvm_guest_cpu_init(); | 401 | kvm_guest_cpu_init(); |
403 | native_smp_prepare_boot_cpu(); | 402 | native_smp_prepare_boot_cpu(); |
404 | } | 403 | } |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 82746f942cd8..7720ff5a9ee2 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -75,20 +75,113 @@ struct microcode_amd { | |||
75 | 75 | ||
76 | static struct equiv_cpu_entry *equiv_cpu_table; | 76 | static struct equiv_cpu_entry *equiv_cpu_table; |
77 | 77 | ||
78 | /* page-sized ucode patch buffer */ | 78 | struct ucode_patch { |
79 | void *patch; | 79 | struct list_head plist; |
80 | void *data; | ||
81 | u32 patch_id; | ||
82 | u16 equiv_cpu; | ||
83 | }; | ||
84 | |||
85 | static LIST_HEAD(pcache); | ||
86 | |||
87 | static u16 find_equiv_id(unsigned int cpu) | ||
88 | { | ||
89 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
90 | int i = 0; | ||
91 | |||
92 | if (!equiv_cpu_table) | ||
93 | return 0; | ||
94 | |||
95 | while (equiv_cpu_table[i].installed_cpu != 0) { | ||
96 | if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) | ||
97 | return equiv_cpu_table[i].equiv_cpu; | ||
98 | |||
99 | i++; | ||
100 | } | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) | ||
105 | { | ||
106 | int i = 0; | ||
107 | |||
108 | BUG_ON(!equiv_cpu_table); | ||
109 | |||
110 | while (equiv_cpu_table[i].equiv_cpu != 0) { | ||
111 | if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) | ||
112 | return equiv_cpu_table[i].installed_cpu; | ||
113 | i++; | ||
114 | } | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * a small, trivial cache of per-family ucode patches | ||
120 | */ | ||
121 | static struct ucode_patch *cache_find_patch(u16 equiv_cpu) | ||
122 | { | ||
123 | struct ucode_patch *p; | ||
124 | |||
125 | list_for_each_entry(p, &pcache, plist) | ||
126 | if (p->equiv_cpu == equiv_cpu) | ||
127 | return p; | ||
128 | return NULL; | ||
129 | } | ||
130 | |||
131 | static void update_cache(struct ucode_patch *new_patch) | ||
132 | { | ||
133 | struct ucode_patch *p; | ||
134 | |||
135 | list_for_each_entry(p, &pcache, plist) { | ||
136 | if (p->equiv_cpu == new_patch->equiv_cpu) { | ||
137 | if (p->patch_id >= new_patch->patch_id) | ||
138 | /* we already have the latest patch */ | ||
139 | return; | ||
140 | |||
141 | list_replace(&p->plist, &new_patch->plist); | ||
142 | kfree(p->data); | ||
143 | kfree(p); | ||
144 | return; | ||
145 | } | ||
146 | } | ||
147 | /* no patch found, add it */ | ||
148 | list_add_tail(&new_patch->plist, &pcache); | ||
149 | } | ||
150 | |||
151 | static void free_cache(void) | ||
152 | { | ||
153 | struct ucode_patch *p, *tmp; | ||
154 | |||
155 | list_for_each_entry_safe(p, tmp, &pcache, plist) { | ||
156 | __list_del(p->plist.prev, p->plist.next); | ||
157 | kfree(p->data); | ||
158 | kfree(p); | ||
159 | } | ||
160 | } | ||
161 | |||
162 | static struct ucode_patch *find_patch(unsigned int cpu) | ||
163 | { | ||
164 | u16 equiv_id; | ||
165 | |||
166 | equiv_id = find_equiv_id(cpu); | ||
167 | if (!equiv_id) | ||
168 | return NULL; | ||
169 | |||
170 | return cache_find_patch(equiv_id); | ||
171 | } | ||
80 | 172 | ||
81 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 173 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
82 | { | 174 | { |
83 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 175 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
84 | 176 | ||
177 | csig->sig = cpuid_eax(0x00000001); | ||
85 | csig->rev = c->microcode; | 178 | csig->rev = c->microcode; |
86 | pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); | 179 | pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); |
87 | 180 | ||
88 | return 0; | 181 | return 0; |
89 | } | 182 | } |
90 | 183 | ||
91 | static unsigned int verify_ucode_size(int cpu, u32 patch_size, | 184 | static unsigned int verify_patch_size(int cpu, u32 patch_size, |
92 | unsigned int size) | 185 | unsigned int size) |
93 | { | 186 | { |
94 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 187 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
@@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size, | |||
118 | return patch_size; | 211 | return patch_size; |
119 | } | 212 | } |
120 | 213 | ||
121 | static u16 find_equiv_id(void) | 214 | static int apply_microcode_amd(int cpu) |
122 | { | 215 | { |
123 | unsigned int current_cpu_id, i = 0; | 216 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
124 | 217 | struct microcode_amd *mc_amd; | |
125 | BUG_ON(equiv_cpu_table == NULL); | 218 | struct ucode_cpu_info *uci; |
126 | 219 | struct ucode_patch *p; | |
127 | current_cpu_id = cpuid_eax(0x00000001); | 220 | u32 rev, dummy; |
128 | |||
129 | while (equiv_cpu_table[i].installed_cpu != 0) { | ||
130 | if (current_cpu_id == equiv_cpu_table[i].installed_cpu) | ||
131 | return equiv_cpu_table[i].equiv_cpu; | ||
132 | |||
133 | i++; | ||
134 | } | ||
135 | return 0; | ||
136 | } | ||
137 | 221 | ||
138 | /* | 222 | BUG_ON(raw_smp_processor_id() != cpu); |
139 | * we signal a good patch is found by returning its size > 0 | ||
140 | */ | ||
141 | static int get_matching_microcode(int cpu, const u8 *ucode_ptr, | ||
142 | unsigned int leftover_size, int rev, | ||
143 | unsigned int *current_size) | ||
144 | { | ||
145 | struct microcode_header_amd *mc_hdr; | ||
146 | unsigned int actual_size, patch_size; | ||
147 | u16 equiv_cpu_id; | ||
148 | 223 | ||
149 | /* size of the current patch we're staring at */ | 224 | uci = ucode_cpu_info + cpu; |
150 | patch_size = *(u32 *)(ucode_ptr + 4); | ||
151 | *current_size = patch_size + SECTION_HDR_SIZE; | ||
152 | 225 | ||
153 | equiv_cpu_id = find_equiv_id(); | 226 | p = find_patch(cpu); |
154 | if (!equiv_cpu_id) | 227 | if (!p) |
155 | return 0; | 228 | return 0; |
156 | 229 | ||
157 | /* | 230 | mc_amd = p->data; |
158 | * let's look at the patch header itself now | 231 | uci->mc = p->data; |
159 | */ | ||
160 | mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); | ||
161 | 232 | ||
162 | if (mc_hdr->processor_rev_id != equiv_cpu_id) | 233 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
163 | return 0; | ||
164 | 234 | ||
165 | /* ucode might be chipset specific -- currently we don't support this */ | 235 | /* need to apply patch? */ |
166 | if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { | 236 | if (rev >= mc_amd->hdr.patch_id) { |
167 | pr_err("CPU%d: chipset specific code not yet supported\n", | 237 | c->microcode = rev; |
168 | cpu); | ||
169 | return 0; | 238 | return 0; |
170 | } | 239 | } |
171 | 240 | ||
172 | if (mc_hdr->patch_id <= rev) | ||
173 | return 0; | ||
174 | |||
175 | /* | ||
176 | * now that the header looks sane, verify its size | ||
177 | */ | ||
178 | actual_size = verify_ucode_size(cpu, patch_size, leftover_size); | ||
179 | if (!actual_size) | ||
180 | return 0; | ||
181 | |||
182 | /* clear the patch buffer */ | ||
183 | memset(patch, 0, PAGE_SIZE); | ||
184 | |||
185 | /* all looks ok, get the binary patch */ | ||
186 | get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); | ||
187 | |||
188 | return actual_size; | ||
189 | } | ||
190 | |||
191 | static int apply_microcode_amd(int cpu) | ||
192 | { | ||
193 | u32 rev, dummy; | ||
194 | int cpu_num = raw_smp_processor_id(); | ||
195 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
196 | struct microcode_amd *mc_amd = uci->mc; | ||
197 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
198 | |||
199 | /* We should bind the task to the CPU */ | ||
200 | BUG_ON(cpu_num != cpu); | ||
201 | |||
202 | if (mc_amd == NULL) | ||
203 | return 0; | ||
204 | |||
205 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | 241 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); |
206 | /* get patch id after patching */ | ||
207 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); | ||
208 | 242 | ||
209 | /* check current patch id and patch's id for match */ | 243 | /* verify patch application was successful */ |
244 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); | ||
210 | if (rev != mc_amd->hdr.patch_id) { | 245 | if (rev != mc_amd->hdr.patch_id) { |
211 | pr_err("CPU%d: update failed for patch_level=0x%08x\n", | 246 | pr_err("CPU%d: update failed for patch_level=0x%08x\n", |
212 | cpu, mc_amd->hdr.patch_id); | 247 | cpu, mc_amd->hdr.patch_id); |
@@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
238 | return -ENOMEM; | 273 | return -ENOMEM; |
239 | } | 274 | } |
240 | 275 | ||
241 | get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); | 276 | memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); |
242 | 277 | ||
243 | /* add header length */ | 278 | /* add header length */ |
244 | return size + CONTAINER_HDR_SZ; | 279 | return size + CONTAINER_HDR_SZ; |
@@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void) | |||
250 | equiv_cpu_table = NULL; | 285 | equiv_cpu_table = NULL; |
251 | } | 286 | } |
252 | 287 | ||
253 | static enum ucode_state | 288 | static void cleanup(void) |
254 | generic_load_microcode(int cpu, const u8 *data, size_t size) | ||
255 | { | 289 | { |
256 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 290 | free_equiv_cpu_table(); |
257 | struct microcode_header_amd *mc_hdr = NULL; | 291 | free_cache(); |
258 | unsigned int mc_size, leftover, current_size = 0; | 292 | } |
293 | |||
294 | /* | ||
295 | * We return the current size even if some of the checks failed so that | ||
296 | * we can skip over the next patch. If we return a negative value, we | ||
297 | * signal a grave error like a memory allocation has failed and the | ||
298 | * driver cannot continue functioning normally. In such cases, we tear | ||
299 | * down everything we've used up so far and exit. | ||
300 | */ | ||
301 | static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) | ||
302 | { | ||
303 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
304 | struct microcode_header_amd *mc_hdr; | ||
305 | struct ucode_patch *patch; | ||
306 | unsigned int patch_size, crnt_size, ret; | ||
307 | u32 proc_fam; | ||
308 | u16 proc_id; | ||
309 | |||
310 | patch_size = *(u32 *)(fw + 4); | ||
311 | crnt_size = patch_size + SECTION_HDR_SIZE; | ||
312 | mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); | ||
313 | proc_id = mc_hdr->processor_rev_id; | ||
314 | |||
315 | proc_fam = find_cpu_family_by_equiv_cpu(proc_id); | ||
316 | if (!proc_fam) { | ||
317 | pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); | ||
318 | return crnt_size; | ||
319 | } | ||
320 | |||
321 | /* check if patch is for the current family */ | ||
322 | proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); | ||
323 | if (proc_fam != c->x86) | ||
324 | return crnt_size; | ||
325 | |||
326 | if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { | ||
327 | pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", | ||
328 | mc_hdr->patch_id); | ||
329 | return crnt_size; | ||
330 | } | ||
331 | |||
332 | ret = verify_patch_size(cpu, patch_size, leftover); | ||
333 | if (!ret) { | ||
334 | pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); | ||
335 | return crnt_size; | ||
336 | } | ||
337 | |||
338 | patch = kzalloc(sizeof(*patch), GFP_KERNEL); | ||
339 | if (!patch) { | ||
340 | pr_err("Patch allocation failure.\n"); | ||
341 | return -EINVAL; | ||
342 | } | ||
343 | |||
344 | patch->data = kzalloc(patch_size, GFP_KERNEL); | ||
345 | if (!patch->data) { | ||
346 | pr_err("Patch data allocation failure.\n"); | ||
347 | kfree(patch); | ||
348 | return -EINVAL; | ||
349 | } | ||
350 | |||
351 | /* All looks ok, copy patch... */ | ||
352 | memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); | ||
353 | INIT_LIST_HEAD(&patch->plist); | ||
354 | patch->patch_id = mc_hdr->patch_id; | ||
355 | patch->equiv_cpu = proc_id; | ||
356 | |||
357 | /* ... and add to cache. */ | ||
358 | update_cache(patch); | ||
359 | |||
360 | return crnt_size; | ||
361 | } | ||
362 | |||
363 | static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) | ||
364 | { | ||
365 | enum ucode_state ret = UCODE_ERROR; | ||
366 | unsigned int leftover; | ||
367 | u8 *fw = (u8 *)data; | ||
368 | int crnt_size = 0; | ||
259 | int offset; | 369 | int offset; |
260 | const u8 *ucode_ptr = data; | ||
261 | void *new_mc = NULL; | ||
262 | unsigned int new_rev = uci->cpu_sig.rev; | ||
263 | enum ucode_state state = UCODE_ERROR; | ||
264 | 370 | ||
265 | offset = install_equiv_cpu_table(ucode_ptr); | 371 | offset = install_equiv_cpu_table(data); |
266 | if (offset < 0) { | 372 | if (offset < 0) { |
267 | pr_err("failed to create equivalent cpu table\n"); | 373 | pr_err("failed to create equivalent cpu table\n"); |
268 | goto out; | 374 | return ret; |
269 | } | 375 | } |
270 | ucode_ptr += offset; | 376 | fw += offset; |
271 | leftover = size - offset; | 377 | leftover = size - offset; |
272 | 378 | ||
273 | if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { | 379 | if (*(u32 *)fw != UCODE_UCODE_TYPE) { |
274 | pr_err("invalid type field in container file section header\n"); | 380 | pr_err("invalid type field in container file section header\n"); |
275 | goto free_table; | 381 | free_equiv_cpu_table(); |
382 | return ret; | ||
276 | } | 383 | } |
277 | 384 | ||
278 | while (leftover) { | 385 | while (leftover) { |
279 | mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, | 386 | crnt_size = verify_and_add_patch(cpu, fw, leftover); |
280 | new_rev, ¤t_size); | 387 | if (crnt_size < 0) |
281 | if (mc_size) { | 388 | return ret; |
282 | mc_hdr = patch; | ||
283 | new_mc = patch; | ||
284 | new_rev = mc_hdr->patch_id; | ||
285 | goto out_ok; | ||
286 | } | ||
287 | |||
288 | ucode_ptr += current_size; | ||
289 | leftover -= current_size; | ||
290 | } | ||
291 | 389 | ||
292 | if (!new_mc) { | 390 | fw += crnt_size; |
293 | state = UCODE_NFOUND; | 391 | leftover -= crnt_size; |
294 | goto free_table; | ||
295 | } | 392 | } |
296 | 393 | ||
297 | out_ok: | 394 | return UCODE_OK; |
298 | uci->mc = new_mc; | ||
299 | state = UCODE_OK; | ||
300 | pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", | ||
301 | cpu, uci->cpu_sig.rev, new_rev); | ||
302 | |||
303 | free_table: | ||
304 | free_equiv_cpu_table(); | ||
305 | |||
306 | out: | ||
307 | return state; | ||
308 | } | 395 | } |
309 | 396 | ||
310 | /* | 397 | /* |
@@ -315,7 +402,7 @@ out: | |||
315 | * | 402 | * |
316 | * This legacy file is always smaller than 2K in size. | 403 | * This legacy file is always smaller than 2K in size. |
317 | * | 404 | * |
318 | * Starting at family 15h they are in family specific firmware files: | 405 | * Beginning with family 15h, they are in family-specific firmware files: |
319 | * | 406 | * |
320 | * amd-ucode/microcode_amd_fam15h.bin | 407 | * amd-ucode/microcode_amd_fam15h.bin |
321 | * amd-ucode/microcode_amd_fam16h.bin | 408 | * amd-ucode/microcode_amd_fam16h.bin |
@@ -323,12 +410,17 @@ out: | |||
323 | * | 410 | * |
324 | * These might be larger than 2K. | 411 | * These might be larger than 2K. |
325 | */ | 412 | */ |
326 | static enum ucode_state request_microcode_amd(int cpu, struct device *device) | 413 | static enum ucode_state request_microcode_amd(int cpu, struct device *device, |
414 | bool refresh_fw) | ||
327 | { | 415 | { |
328 | char fw_name[36] = "amd-ucode/microcode_amd.bin"; | 416 | char fw_name[36] = "amd-ucode/microcode_amd.bin"; |
329 | const struct firmware *fw; | ||
330 | enum ucode_state ret = UCODE_NFOUND; | ||
331 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 417 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
418 | enum ucode_state ret = UCODE_NFOUND; | ||
419 | const struct firmware *fw; | ||
420 | |||
421 | /* reload ucode container only on the boot cpu */ | ||
422 | if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) | ||
423 | return UCODE_OK; | ||
332 | 424 | ||
333 | if (c->x86 >= 0x15) | 425 | if (c->x86 >= 0x15) |
334 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); | 426 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); |
@@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device) | |||
344 | goto fw_release; | 436 | goto fw_release; |
345 | } | 437 | } |
346 | 438 | ||
347 | ret = generic_load_microcode(cpu, fw->data, fw->size); | 439 | /* free old equiv table */ |
440 | free_equiv_cpu_table(); | ||
441 | |||
442 | ret = load_microcode_amd(cpu, fw->data, fw->size); | ||
443 | if (ret != UCODE_OK) | ||
444 | cleanup(); | ||
348 | 445 | ||
349 | fw_release: | 446 | fw_release: |
350 | release_firmware(fw); | 447 | release_firmware(fw); |
351 | 448 | ||
352 | out: | 449 | out: |
353 | return ret; | 450 | return ret; |
354 | } | 451 | } |
355 | 452 | ||
@@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void) | |||
383 | return NULL; | 480 | return NULL; |
384 | } | 481 | } |
385 | 482 | ||
386 | patch = (void *)get_zeroed_page(GFP_KERNEL); | ||
387 | if (!patch) | ||
388 | return NULL; | ||
389 | |||
390 | return µcode_amd_ops; | 483 | return µcode_amd_ops; |
391 | } | 484 | } |
392 | 485 | ||
393 | void __exit exit_amd_microcode(void) | 486 | void __exit exit_amd_microcode(void) |
394 | { | 487 | { |
395 | free_page((unsigned long)patch); | 488 | cleanup(); |
396 | } | 489 | } |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 4873e62db6a1..3a04b224d0c0 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, | |||
225 | if (do_microcode_update(buf, len) == 0) | 225 | if (do_microcode_update(buf, len) == 0) |
226 | ret = (ssize_t)len; | 226 | ret = (ssize_t)len; |
227 | 227 | ||
228 | if (ret > 0) | ||
229 | perf_check_microcode(); | ||
230 | |||
228 | mutex_unlock(µcode_mutex); | 231 | mutex_unlock(µcode_mutex); |
229 | put_online_cpus(); | 232 | put_online_cpus(); |
230 | 233 | ||
@@ -276,19 +279,18 @@ static struct platform_device *microcode_pdev; | |||
276 | static int reload_for_cpu(int cpu) | 279 | static int reload_for_cpu(int cpu) |
277 | { | 280 | { |
278 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 281 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
282 | enum ucode_state ustate; | ||
279 | int err = 0; | 283 | int err = 0; |
280 | 284 | ||
281 | if (uci->valid) { | 285 | if (!uci->valid) |
282 | enum ucode_state ustate; | 286 | return err; |
283 | |||
284 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); | ||
285 | if (ustate == UCODE_OK) | ||
286 | apply_microcode_on_target(cpu); | ||
287 | else | ||
288 | if (ustate == UCODE_ERROR) | ||
289 | err = -EINVAL; | ||
290 | } | ||
291 | 287 | ||
288 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); | ||
289 | if (ustate == UCODE_OK) | ||
290 | apply_microcode_on_target(cpu); | ||
291 | else | ||
292 | if (ustate == UCODE_ERROR) | ||
293 | err = -EINVAL; | ||
292 | return err; | 294 | return err; |
293 | } | 295 | } |
294 | 296 | ||
@@ -370,18 +372,15 @@ static void microcode_fini_cpu(int cpu) | |||
370 | 372 | ||
371 | static enum ucode_state microcode_resume_cpu(int cpu) | 373 | static enum ucode_state microcode_resume_cpu(int cpu) |
372 | { | 374 | { |
373 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
374 | |||
375 | if (!uci->mc) | ||
376 | return UCODE_NFOUND; | ||
377 | |||
378 | pr_debug("CPU%d updated upon resume\n", cpu); | 375 | pr_debug("CPU%d updated upon resume\n", cpu); |
379 | apply_microcode_on_target(cpu); | 376 | |
377 | if (apply_microcode_on_target(cpu)) | ||
378 | return UCODE_ERROR; | ||
380 | 379 | ||
381 | return UCODE_OK; | 380 | return UCODE_OK; |
382 | } | 381 | } |
383 | 382 | ||
384 | static enum ucode_state microcode_init_cpu(int cpu) | 383 | static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) |
385 | { | 384 | { |
386 | enum ucode_state ustate; | 385 | enum ucode_state ustate; |
387 | 386 | ||
@@ -392,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu) | |||
392 | if (system_state != SYSTEM_RUNNING) | 391 | if (system_state != SYSTEM_RUNNING) |
393 | return UCODE_NFOUND; | 392 | return UCODE_NFOUND; |
394 | 393 | ||
395 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); | 394 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, |
395 | refresh_fw); | ||
396 | 396 | ||
397 | if (ustate == UCODE_OK) { | 397 | if (ustate == UCODE_OK) { |
398 | pr_debug("CPU%d updated upon init\n", cpu); | 398 | pr_debug("CPU%d updated upon init\n", cpu); |
@@ -405,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu) | |||
405 | static enum ucode_state microcode_update_cpu(int cpu) | 405 | static enum ucode_state microcode_update_cpu(int cpu) |
406 | { | 406 | { |
407 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 407 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
408 | enum ucode_state ustate; | ||
409 | 408 | ||
410 | if (uci->valid) | 409 | if (uci->valid) |
411 | ustate = microcode_resume_cpu(cpu); | 410 | return microcode_resume_cpu(cpu); |
412 | else | ||
413 | ustate = microcode_init_cpu(cpu); | ||
414 | 411 | ||
415 | return ustate; | 412 | return microcode_init_cpu(cpu, false); |
416 | } | 413 | } |
417 | 414 | ||
418 | static int mc_device_add(struct device *dev, struct subsys_interface *sif) | 415 | static int mc_device_add(struct device *dev, struct subsys_interface *sif) |
@@ -428,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) | |||
428 | if (err) | 425 | if (err) |
429 | return err; | 426 | return err; |
430 | 427 | ||
431 | if (microcode_init_cpu(cpu) == UCODE_ERROR) | 428 | if (microcode_init_cpu(cpu, true) == UCODE_ERROR) |
432 | return -EINVAL; | 429 | return -EINVAL; |
433 | 430 | ||
434 | return err; | 431 | return err; |
@@ -477,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
477 | struct device *dev; | 474 | struct device *dev; |
478 | 475 | ||
479 | dev = get_cpu_device(cpu); | 476 | dev = get_cpu_device(cpu); |
480 | switch (action) { | 477 | |
478 | switch (action & ~CPU_TASKS_FROZEN) { | ||
481 | case CPU_ONLINE: | 479 | case CPU_ONLINE: |
482 | case CPU_ONLINE_FROZEN: | ||
483 | microcode_update_cpu(cpu); | 480 | microcode_update_cpu(cpu); |
484 | case CPU_DOWN_FAILED: | ||
485 | case CPU_DOWN_FAILED_FROZEN: | ||
486 | pr_debug("CPU%d added\n", cpu); | 481 | pr_debug("CPU%d added\n", cpu); |
482 | /* | ||
483 | * "break" is missing on purpose here because we want to fall | ||
484 | * through in order to create the sysfs group. | ||
485 | */ | ||
486 | |||
487 | case CPU_DOWN_FAILED: | ||
487 | if (sysfs_create_group(&dev->kobj, &mc_attr_group)) | 488 | if (sysfs_create_group(&dev->kobj, &mc_attr_group)) |
488 | pr_err("Failed to create group for CPU%d\n", cpu); | 489 | pr_err("Failed to create group for CPU%d\n", cpu); |
489 | break; | 490 | break; |
491 | |||
490 | case CPU_DOWN_PREPARE: | 492 | case CPU_DOWN_PREPARE: |
491 | case CPU_DOWN_PREPARE_FROZEN: | ||
492 | /* Suspend is in progress, only remove the interface */ | 493 | /* Suspend is in progress, only remove the interface */ |
493 | sysfs_remove_group(&dev->kobj, &mc_attr_group); | 494 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
494 | pr_debug("CPU%d removed\n", cpu); | 495 | pr_debug("CPU%d removed\n", cpu); |
495 | break; | 496 | break; |
496 | 497 | ||
497 | /* | 498 | /* |
499 | * case CPU_DEAD: | ||
500 | * | ||
498 | * When a CPU goes offline, don't free up or invalidate the copy of | 501 | * When a CPU goes offline, don't free up or invalidate the copy of |
499 | * the microcode in kernel memory, so that we can reuse it when the | 502 | * the microcode in kernel memory, so that we can reuse it when the |
500 | * CPU comes back online without unnecessarily requesting the userspace | 503 | * CPU comes back online without unnecessarily requesting the userspace |
501 | * for it again. | 504 | * for it again. |
502 | */ | 505 | */ |
503 | case CPU_UP_CANCELED_FROZEN: | ||
504 | /* The CPU refused to come up during a system resume */ | ||
505 | microcode_fini_cpu(cpu); | ||
506 | break; | ||
507 | } | 506 | } |
507 | |||
508 | /* The CPU refused to come up during a system resume */ | ||
509 | if (action == CPU_UP_CANCELED_FROZEN) | ||
510 | microcode_fini_cpu(cpu); | ||
511 | |||
508 | return NOTIFY_OK; | 512 | return NOTIFY_OK; |
509 | } | 513 | } |
510 | 514 | ||
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0327e2b3c408..3544aed39338 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n) | |||
405 | return 0; | 405 | return 0; |
406 | } | 406 | } |
407 | 407 | ||
408 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) | 408 | static enum ucode_state request_microcode_fw(int cpu, struct device *device, |
409 | bool refresh_fw) | ||
409 | { | 410 | { |
410 | char name[30]; | 411 | char name[30]; |
411 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 412 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index eb113693f043..a7c5661f8496 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -257,12 +257,14 @@ static int __init msr_init(void) | |||
257 | goto out_chrdev; | 257 | goto out_chrdev; |
258 | } | 258 | } |
259 | msr_class->devnode = msr_devnode; | 259 | msr_class->devnode = msr_devnode; |
260 | get_online_cpus(); | ||
260 | for_each_online_cpu(i) { | 261 | for_each_online_cpu(i) { |
261 | err = msr_device_create(i); | 262 | err = msr_device_create(i); |
262 | if (err != 0) | 263 | if (err != 0) |
263 | goto out_class; | 264 | goto out_class; |
264 | } | 265 | } |
265 | register_hotcpu_notifier(&msr_class_cpu_notifier); | 266 | register_hotcpu_notifier(&msr_class_cpu_notifier); |
267 | put_online_cpus(); | ||
266 | 268 | ||
267 | err = 0; | 269 | err = 0; |
268 | goto out; | 270 | goto out; |
@@ -271,6 +273,7 @@ out_class: | |||
271 | i = 0; | 273 | i = 0; |
272 | for_each_online_cpu(i) | 274 | for_each_online_cpu(i) |
273 | msr_device_destroy(i); | 275 | msr_device_destroy(i); |
276 | put_online_cpus(); | ||
274 | class_destroy(msr_class); | 277 | class_destroy(msr_class); |
275 | out_chrdev: | 278 | out_chrdev: |
276 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); | 279 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
@@ -281,11 +284,13 @@ out: | |||
281 | static void __exit msr_exit(void) | 284 | static void __exit msr_exit(void) |
282 | { | 285 | { |
283 | int cpu = 0; | 286 | int cpu = 0; |
287 | get_online_cpus(); | ||
284 | for_each_online_cpu(cpu) | 288 | for_each_online_cpu(cpu) |
285 | msr_device_destroy(cpu); | 289 | msr_device_destroy(cpu); |
286 | class_destroy(msr_class); | 290 | class_destroy(msr_class); |
287 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); | 291 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
288 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); | 292 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); |
293 | put_online_cpus(); | ||
289 | } | 294 | } |
290 | 295 | ||
291 | module_init(msr_init); | 296 | module_init(msr_init); |
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 000000000000..e309cc5c276e --- /dev/null +++ b/arch/x86/kernel/perf_regs.c | |||
@@ -0,0 +1,105 @@ | |||
1 | #include <linux/errno.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/perf_event.h> | ||
5 | #include <linux/bug.h> | ||
6 | #include <linux/stddef.h> | ||
7 | #include <asm/perf_regs.h> | ||
8 | #include <asm/ptrace.h> | ||
9 | |||
10 | #ifdef CONFIG_X86_32 | ||
11 | #define PERF_REG_X86_MAX PERF_REG_X86_32_MAX | ||
12 | #else | ||
13 | #define PERF_REG_X86_MAX PERF_REG_X86_64_MAX | ||
14 | #endif | ||
15 | |||
16 | #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) | ||
17 | |||
18 | static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { | ||
19 | PT_REGS_OFFSET(PERF_REG_X86_AX, ax), | ||
20 | PT_REGS_OFFSET(PERF_REG_X86_BX, bx), | ||
21 | PT_REGS_OFFSET(PERF_REG_X86_CX, cx), | ||
22 | PT_REGS_OFFSET(PERF_REG_X86_DX, dx), | ||
23 | PT_REGS_OFFSET(PERF_REG_X86_SI, si), | ||
24 | PT_REGS_OFFSET(PERF_REG_X86_DI, di), | ||
25 | PT_REGS_OFFSET(PERF_REG_X86_BP, bp), | ||
26 | PT_REGS_OFFSET(PERF_REG_X86_SP, sp), | ||
27 | PT_REGS_OFFSET(PERF_REG_X86_IP, ip), | ||
28 | PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), | ||
29 | PT_REGS_OFFSET(PERF_REG_X86_CS, cs), | ||
30 | PT_REGS_OFFSET(PERF_REG_X86_SS, ss), | ||
31 | #ifdef CONFIG_X86_32 | ||
32 | PT_REGS_OFFSET(PERF_REG_X86_DS, ds), | ||
33 | PT_REGS_OFFSET(PERF_REG_X86_ES, es), | ||
34 | PT_REGS_OFFSET(PERF_REG_X86_FS, fs), | ||
35 | PT_REGS_OFFSET(PERF_REG_X86_GS, gs), | ||
36 | #else | ||
37 | /* | ||
38 | * The pt_regs struct does not store | ||
39 | * ds, es, fs, gs in 64 bit mode. | ||
40 | */ | ||
41 | (unsigned int) -1, | ||
42 | (unsigned int) -1, | ||
43 | (unsigned int) -1, | ||
44 | (unsigned int) -1, | ||
45 | #endif | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | PT_REGS_OFFSET(PERF_REG_X86_R8, r8), | ||
48 | PT_REGS_OFFSET(PERF_REG_X86_R9, r9), | ||
49 | PT_REGS_OFFSET(PERF_REG_X86_R10, r10), | ||
50 | PT_REGS_OFFSET(PERF_REG_X86_R11, r11), | ||
51 | PT_REGS_OFFSET(PERF_REG_X86_R12, r12), | ||
52 | PT_REGS_OFFSET(PERF_REG_X86_R13, r13), | ||
53 | PT_REGS_OFFSET(PERF_REG_X86_R14, r14), | ||
54 | PT_REGS_OFFSET(PERF_REG_X86_R15, r15), | ||
55 | #endif | ||
56 | }; | ||
57 | |||
58 | u64 perf_reg_value(struct pt_regs *regs, int idx) | ||
59 | { | ||
60 | if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) | ||
61 | return 0; | ||
62 | |||
63 | return regs_get_register(regs, pt_regs_offset[idx]); | ||
64 | } | ||
65 | |||
66 | #define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) | ||
67 | |||
68 | #ifdef CONFIG_X86_32 | ||
69 | int perf_reg_validate(u64 mask) | ||
70 | { | ||
71 | if (!mask || mask & REG_RESERVED) | ||
72 | return -EINVAL; | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | u64 perf_reg_abi(struct task_struct *task) | ||
78 | { | ||
79 | return PERF_SAMPLE_REGS_ABI_32; | ||
80 | } | ||
81 | #else /* CONFIG_X86_64 */ | ||
82 | #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ | ||
83 | (1ULL << PERF_REG_X86_ES) | \ | ||
84 | (1ULL << PERF_REG_X86_FS) | \ | ||
85 | (1ULL << PERF_REG_X86_GS)) | ||
86 | |||
87 | int perf_reg_validate(u64 mask) | ||
88 | { | ||
89 | if (!mask || mask & REG_RESERVED) | ||
90 | return -EINVAL; | ||
91 | |||
92 | if (mask & REG_NOSUPPORT) | ||
93 | return -EINVAL; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | u64 perf_reg_abi(struct task_struct *task) | ||
99 | { | ||
100 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
101 | return PERF_SAMPLE_REGS_ABI_32; | ||
102 | else | ||
103 | return PERF_SAMPLE_REGS_ABI_64; | ||
104 | } | ||
105 | #endif /* CONFIG_X86_32 */ | ||
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 0bc72e2069e3..d5f15c3f7b25 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c | |||
@@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) | |||
150 | return oprom; | 150 | return oprom; |
151 | } | 151 | } |
152 | 152 | ||
153 | void *pci_map_biosrom(struct pci_dev *pdev) | 153 | void __iomem *pci_map_biosrom(struct pci_dev *pdev) |
154 | { | 154 | { |
155 | struct resource *oprom = find_oprom(pdev); | 155 | struct resource *oprom = find_oprom(pdev); |
156 | 156 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ef6a8456f719..b644e1c765dc 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
66 | { | 66 | { |
67 | int ret; | 67 | int ret; |
68 | 68 | ||
69 | unlazy_fpu(src); | ||
70 | |||
71 | *dst = *src; | 69 | *dst = *src; |
72 | if (fpu_allocated(&src->thread.fpu)) { | 70 | if (fpu_allocated(&src->thread.fpu)) { |
73 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); | 71 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
74 | ret = fpu_alloc(&dst->thread.fpu); | 72 | ret = fpu_alloc(&dst->thread.fpu); |
75 | if (ret) | 73 | if (ret) |
76 | return ret; | 74 | return ret; |
77 | fpu_copy(&dst->thread.fpu, &src->thread.fpu); | 75 | fpu_copy(dst, src); |
78 | } | 76 | } |
79 | return 0; | 77 | return 0; |
80 | } | 78 | } |
@@ -97,16 +95,6 @@ void arch_task_cache_init(void) | |||
97 | SLAB_PANIC | SLAB_NOTRACK, NULL); | 95 | SLAB_PANIC | SLAB_NOTRACK, NULL); |
98 | } | 96 | } |
99 | 97 | ||
100 | static inline void drop_fpu(struct task_struct *tsk) | ||
101 | { | ||
102 | /* | ||
103 | * Forget coprocessor state.. | ||
104 | */ | ||
105 | tsk->fpu_counter = 0; | ||
106 | clear_fpu(tsk); | ||
107 | clear_used_math(); | ||
108 | } | ||
109 | |||
110 | /* | 98 | /* |
111 | * Free current thread data structures etc.. | 99 | * Free current thread data structures etc.. |
112 | */ | 100 | */ |
@@ -163,7 +151,13 @@ void flush_thread(void) | |||
163 | 151 | ||
164 | flush_ptrace_hw_breakpoint(tsk); | 152 | flush_ptrace_hw_breakpoint(tsk); |
165 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 153 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
166 | drop_fpu(tsk); | 154 | drop_init_fpu(tsk); |
155 | /* | ||
156 | * Free the FPU state for non xsave platforms. They get reallocated | ||
157 | * lazily at the first use. | ||
158 | */ | ||
159 | if (!use_eager_fpu()) | ||
160 | free_thread_xstate(tsk); | ||
167 | } | 161 | } |
168 | 162 | ||
169 | static void hard_disable_TSC(void) | 163 | static void hard_disable_TSC(void) |
@@ -299,71 +293,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, | |||
299 | } | 293 | } |
300 | 294 | ||
301 | /* | 295 | /* |
302 | * This gets run with %si containing the | ||
303 | * function to call, and %di containing | ||
304 | * the "args". | ||
305 | */ | ||
306 | extern void kernel_thread_helper(void); | ||
307 | |||
308 | /* | ||
309 | * Create a kernel thread | ||
310 | */ | ||
311 | int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | ||
312 | { | ||
313 | struct pt_regs regs; | ||
314 | |||
315 | memset(®s, 0, sizeof(regs)); | ||
316 | |||
317 | regs.si = (unsigned long) fn; | ||
318 | regs.di = (unsigned long) arg; | ||
319 | |||
320 | #ifdef CONFIG_X86_32 | ||
321 | regs.ds = __USER_DS; | ||
322 | regs.es = __USER_DS; | ||
323 | regs.fs = __KERNEL_PERCPU; | ||
324 | regs.gs = __KERNEL_STACK_CANARY; | ||
325 | #else | ||
326 | regs.ss = __KERNEL_DS; | ||
327 | #endif | ||
328 | |||
329 | regs.orig_ax = -1; | ||
330 | regs.ip = (unsigned long) kernel_thread_helper; | ||
331 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | ||
332 | regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; | ||
333 | |||
334 | /* Ok, create the new process.. */ | ||
335 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | ||
336 | } | ||
337 | EXPORT_SYMBOL(kernel_thread); | ||
338 | |||
339 | /* | ||
340 | * sys_execve() executes a new program. | ||
341 | */ | ||
342 | long sys_execve(const char __user *name, | ||
343 | const char __user *const __user *argv, | ||
344 | const char __user *const __user *envp, struct pt_regs *regs) | ||
345 | { | ||
346 | long error; | ||
347 | char *filename; | ||
348 | |||
349 | filename = getname(name); | ||
350 | error = PTR_ERR(filename); | ||
351 | if (IS_ERR(filename)) | ||
352 | return error; | ||
353 | error = do_execve(filename, argv, envp, regs); | ||
354 | |||
355 | #ifdef CONFIG_X86_32 | ||
356 | if (error == 0) { | ||
357 | /* Make sure we don't return using sysenter.. */ | ||
358 | set_thread_flag(TIF_IRET); | ||
359 | } | ||
360 | #endif | ||
361 | |||
362 | putname(filename); | ||
363 | return error; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * Idle related variables and functions | 296 | * Idle related variables and functions |
368 | */ | 297 | */ |
369 | unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; | 298 | unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa186121b..44e0bff38e72 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <asm/switch_to.h> | 57 | #include <asm/switch_to.h> |
58 | 58 | ||
59 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 59 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
60 | asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); | ||
60 | 61 | ||
61 | /* | 62 | /* |
62 | * Return saved PC of a blocked thread. | 63 | * Return saved PC of a blocked thread. |
@@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task) | |||
127 | } | 128 | } |
128 | 129 | ||
129 | int copy_thread(unsigned long clone_flags, unsigned long sp, | 130 | int copy_thread(unsigned long clone_flags, unsigned long sp, |
130 | unsigned long unused, | 131 | unsigned long arg, |
131 | struct task_struct *p, struct pt_regs *regs) | 132 | struct task_struct *p, struct pt_regs *regs) |
132 | { | 133 | { |
133 | struct pt_regs *childregs; | 134 | struct pt_regs *childregs = task_pt_regs(p); |
134 | struct task_struct *tsk; | 135 | struct task_struct *tsk; |
135 | int err; | 136 | int err; |
136 | 137 | ||
137 | childregs = task_pt_regs(p); | 138 | p->thread.sp = (unsigned long) childregs; |
139 | p->thread.sp0 = (unsigned long) (childregs+1); | ||
140 | |||
141 | if (unlikely(!regs)) { | ||
142 | /* kernel thread */ | ||
143 | memset(childregs, 0, sizeof(struct pt_regs)); | ||
144 | p->thread.ip = (unsigned long) ret_from_kernel_thread; | ||
145 | task_user_gs(p) = __KERNEL_STACK_CANARY; | ||
146 | childregs->ds = __USER_DS; | ||
147 | childregs->es = __USER_DS; | ||
148 | childregs->fs = __KERNEL_PERCPU; | ||
149 | childregs->bx = sp; /* function */ | ||
150 | childregs->bp = arg; | ||
151 | childregs->orig_ax = -1; | ||
152 | childregs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
153 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; | ||
154 | p->fpu_counter = 0; | ||
155 | p->thread.io_bitmap_ptr = NULL; | ||
156 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
157 | return 0; | ||
158 | } | ||
138 | *childregs = *regs; | 159 | *childregs = *regs; |
139 | childregs->ax = 0; | 160 | childregs->ax = 0; |
140 | childregs->sp = sp; | 161 | childregs->sp = sp; |
141 | 162 | ||
142 | p->thread.sp = (unsigned long) childregs; | ||
143 | p->thread.sp0 = (unsigned long) (childregs+1); | ||
144 | |||
145 | p->thread.ip = (unsigned long) ret_from_fork; | 163 | p->thread.ip = (unsigned long) ret_from_fork; |
146 | |||
147 | task_user_gs(p) = get_user_gs(regs); | 164 | task_user_gs(p) = get_user_gs(regs); |
148 | 165 | ||
149 | p->fpu_counter = 0; | 166 | p->fpu_counter = 0; |
@@ -190,10 +207,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
190 | regs->cs = __USER_CS; | 207 | regs->cs = __USER_CS; |
191 | regs->ip = new_ip; | 208 | regs->ip = new_ip; |
192 | regs->sp = new_sp; | 209 | regs->sp = new_sp; |
210 | regs->flags = X86_EFLAGS_IF; | ||
193 | /* | 211 | /* |
194 | * Free the old FP and other extended state | 212 | * force it to the iret return path by making it look as if there was |
213 | * some work pending. | ||
195 | */ | 214 | */ |
196 | free_thread_xstate(current); | 215 | set_thread_flag(TIF_NOTIFY_RESUME); |
197 | } | 216 | } |
198 | EXPORT_SYMBOL_GPL(start_thread); | 217 | EXPORT_SYMBOL_GPL(start_thread); |
199 | 218 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0a980c9d7cb8..16c6365e2b86 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls) | |||
146 | } | 146 | } |
147 | 147 | ||
148 | int copy_thread(unsigned long clone_flags, unsigned long sp, | 148 | int copy_thread(unsigned long clone_flags, unsigned long sp, |
149 | unsigned long unused, | 149 | unsigned long arg, |
150 | struct task_struct *p, struct pt_regs *regs) | 150 | struct task_struct *p, struct pt_regs *regs) |
151 | { | 151 | { |
152 | int err; | 152 | int err; |
153 | struct pt_regs *childregs; | 153 | struct pt_regs *childregs; |
154 | struct task_struct *me = current; | 154 | struct task_struct *me = current; |
155 | 155 | ||
156 | childregs = ((struct pt_regs *) | 156 | p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; |
157 | (THREAD_SIZE + task_stack_page(p))) - 1; | 157 | childregs = task_pt_regs(p); |
158 | *childregs = *regs; | ||
159 | |||
160 | childregs->ax = 0; | ||
161 | if (user_mode(regs)) | ||
162 | childregs->sp = sp; | ||
163 | else | ||
164 | childregs->sp = (unsigned long)childregs; | ||
165 | |||
166 | p->thread.sp = (unsigned long) childregs; | 158 | p->thread.sp = (unsigned long) childregs; |
167 | p->thread.sp0 = (unsigned long) (childregs+1); | ||
168 | p->thread.usersp = me->thread.usersp; | 159 | p->thread.usersp = me->thread.usersp; |
169 | |||
170 | set_tsk_thread_flag(p, TIF_FORK); | 160 | set_tsk_thread_flag(p, TIF_FORK); |
171 | |||
172 | p->fpu_counter = 0; | 161 | p->fpu_counter = 0; |
173 | p->thread.io_bitmap_ptr = NULL; | 162 | p->thread.io_bitmap_ptr = NULL; |
174 | 163 | ||
@@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
178 | p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; | 167 | p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; |
179 | savesegment(es, p->thread.es); | 168 | savesegment(es, p->thread.es); |
180 | savesegment(ds, p->thread.ds); | 169 | savesegment(ds, p->thread.ds); |
170 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
171 | |||
172 | if (unlikely(!regs)) { | ||
173 | /* kernel thread */ | ||
174 | memset(childregs, 0, sizeof(struct pt_regs)); | ||
175 | childregs->sp = (unsigned long)childregs; | ||
176 | childregs->ss = __KERNEL_DS; | ||
177 | childregs->bx = sp; /* function */ | ||
178 | childregs->bp = arg; | ||
179 | childregs->orig_ax = -1; | ||
180 | childregs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
181 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; | ||
182 | return 0; | ||
183 | } | ||
184 | *childregs = *regs; | ||
185 | |||
186 | childregs->ax = 0; | ||
187 | childregs->sp = sp; | ||
181 | 188 | ||
182 | err = -ENOMEM; | 189 | err = -ENOMEM; |
183 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 190 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
@@ -232,10 +239,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, | |||
232 | regs->cs = _cs; | 239 | regs->cs = _cs; |
233 | regs->ss = _ss; | 240 | regs->ss = _ss; |
234 | regs->flags = X86_EFLAGS_IF; | 241 | regs->flags = X86_EFLAGS_IF; |
235 | /* | ||
236 | * Free the old FP and other extended state | ||
237 | */ | ||
238 | free_thread_xstate(current); | ||
239 | } | 242 | } |
240 | 243 | ||
241 | void | 244 | void |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c2bf0f..b00b33a18390 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/signal.h> | 21 | #include <linux/signal.h> |
22 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
23 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
24 | #include <linux/rcupdate.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = { | |||
1332 | #define genregs32_get genregs_get | 1333 | #define genregs32_get genregs_get |
1333 | #define genregs32_set genregs_set | 1334 | #define genregs32_set genregs_set |
1334 | 1335 | ||
1335 | #define user_i387_ia32_struct user_i387_struct | ||
1336 | #define user32_fxsr_struct user_fxsr_struct | ||
1337 | |||
1338 | #endif /* CONFIG_X86_64 */ | 1336 | #endif /* CONFIG_X86_64 */ |
1339 | 1337 | ||
1340 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 1338 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
@@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1463 | { | 1461 | { |
1464 | long ret = 0; | 1462 | long ret = 0; |
1465 | 1463 | ||
1464 | rcu_user_exit(); | ||
1465 | |||
1466 | /* | 1466 | /* |
1467 | * If we stepped into a sysenter/syscall insn, it trapped in | 1467 | * If we stepped into a sysenter/syscall insn, it trapped in |
1468 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | 1468 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. |
@@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs) | |||
1526 | !test_thread_flag(TIF_SYSCALL_EMU); | 1526 | !test_thread_flag(TIF_SYSCALL_EMU); |
1527 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) | 1527 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) |
1528 | tracehook_report_syscall_exit(regs, step); | 1528 | tracehook_report_syscall_exit(regs, step); |
1529 | |||
1530 | rcu_user_enter(); | ||
1529 | } | 1531 | } |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index af6db6ec5b2a..4929c1be0ac0 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -225,7 +225,7 @@ static struct platform_device rtc_device = { | |||
225 | static __init int add_rtc_cmos(void) | 225 | static __init int add_rtc_cmos(void) |
226 | { | 226 | { |
227 | #ifdef CONFIG_PNP | 227 | #ifdef CONFIG_PNP |
228 | static const char *ids[] __initconst = | 228 | static const char * const const ids[] __initconst = |
229 | { "PNP0b00", "PNP0b01", "PNP0b02", }; | 229 | { "PNP0b00", "PNP0b01", "PNP0b02", }; |
230 | struct pnp_dev *dev; | 230 | struct pnp_dev *dev; |
231 | struct pnp_id *id; | 231 | struct pnp_id *id; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4f165479c453..a2bb18e02839 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -68,6 +68,7 @@ | |||
68 | #include <linux/percpu.h> | 68 | #include <linux/percpu.h> |
69 | #include <linux/crash_dump.h> | 69 | #include <linux/crash_dump.h> |
70 | #include <linux/tboot.h> | 70 | #include <linux/tboot.h> |
71 | #include <linux/jiffies.h> | ||
71 | 72 | ||
72 | #include <video/edid.h> | 73 | #include <video/edid.h> |
73 | 74 | ||
@@ -957,7 +958,7 @@ void __init setup_arch(char **cmdline_p) | |||
957 | initmem_init(); | 958 | initmem_init(); |
958 | memblock_find_dma_reserve(); | 959 | memblock_find_dma_reserve(); |
959 | 960 | ||
960 | #ifdef CONFIG_KVM_CLOCK | 961 | #ifdef CONFIG_KVM_GUEST |
961 | kvmclock_init(); | 962 | kvmclock_init(); |
962 | #endif | 963 | #endif |
963 | 964 | ||
@@ -1032,6 +1033,8 @@ void __init setup_arch(char **cmdline_p) | |||
1032 | mcheck_init(); | 1033 | mcheck_init(); |
1033 | 1034 | ||
1034 | arch_init_ideal_nops(); | 1035 | arch_init_ideal_nops(); |
1036 | |||
1037 | register_refined_jiffies(CLOCK_TICK_RATE); | ||
1035 | } | 1038 | } |
1036 | 1039 | ||
1037 | #ifdef CONFIG_X86_32 | 1040 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376e..29ad351804e9 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
114 | regs->orig_ax = -1; /* disable syscall checks */ | 114 | regs->orig_ax = -1; /* disable syscall checks */ |
115 | 115 | ||
116 | get_user_ex(buf, &sc->fpstate); | 116 | get_user_ex(buf, &sc->fpstate); |
117 | err |= restore_i387_xstate(buf); | ||
118 | 117 | ||
119 | get_user_ex(*pax, &sc->ax); | 118 | get_user_ex(*pax, &sc->ax); |
120 | } get_user_catch(err); | 119 | } get_user_catch(err); |
121 | 120 | ||
121 | err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); | ||
122 | |||
122 | return err; | 123 | return err; |
123 | } | 124 | } |
124 | 125 | ||
@@ -206,35 +207,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
206 | void __user **fpstate) | 207 | void __user **fpstate) |
207 | { | 208 | { |
208 | /* Default to using normal stack */ | 209 | /* Default to using normal stack */ |
210 | unsigned long math_size = 0; | ||
209 | unsigned long sp = regs->sp; | 211 | unsigned long sp = regs->sp; |
212 | unsigned long buf_fx = 0; | ||
210 | int onsigstack = on_sig_stack(sp); | 213 | int onsigstack = on_sig_stack(sp); |
211 | 214 | ||
212 | #ifdef CONFIG_X86_64 | ||
213 | /* redzone */ | 215 | /* redzone */ |
214 | sp -= 128; | 216 | if (config_enabled(CONFIG_X86_64)) |
215 | #endif /* CONFIG_X86_64 */ | 217 | sp -= 128; |
216 | 218 | ||
217 | if (!onsigstack) { | 219 | if (!onsigstack) { |
218 | /* This is the X/Open sanctioned signal stack switching. */ | 220 | /* This is the X/Open sanctioned signal stack switching. */ |
219 | if (ka->sa.sa_flags & SA_ONSTACK) { | 221 | if (ka->sa.sa_flags & SA_ONSTACK) { |
220 | if (current->sas_ss_size) | 222 | if (current->sas_ss_size) |
221 | sp = current->sas_ss_sp + current->sas_ss_size; | 223 | sp = current->sas_ss_sp + current->sas_ss_size; |
222 | } else { | 224 | } else if (config_enabled(CONFIG_X86_32) && |
223 | #ifdef CONFIG_X86_32 | 225 | (regs->ss & 0xffff) != __USER_DS && |
224 | /* This is the legacy signal stack switching. */ | 226 | !(ka->sa.sa_flags & SA_RESTORER) && |
225 | if ((regs->ss & 0xffff) != __USER_DS && | 227 | ka->sa.sa_restorer) { |
226 | !(ka->sa.sa_flags & SA_RESTORER) && | 228 | /* This is the legacy signal stack switching. */ |
227 | ka->sa.sa_restorer) | ||
228 | sp = (unsigned long) ka->sa.sa_restorer; | 229 | sp = (unsigned long) ka->sa.sa_restorer; |
229 | #endif /* CONFIG_X86_32 */ | ||
230 | } | 230 | } |
231 | } | 231 | } |
232 | 232 | ||
233 | if (used_math()) { | 233 | if (used_math()) { |
234 | sp -= sig_xstate_size; | 234 | sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), |
235 | #ifdef CONFIG_X86_64 | 235 | &buf_fx, &math_size); |
236 | sp = round_down(sp, 64); | ||
237 | #endif /* CONFIG_X86_64 */ | ||
238 | *fpstate = (void __user *)sp; | 236 | *fpstate = (void __user *)sp; |
239 | } | 237 | } |
240 | 238 | ||
@@ -247,8 +245,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
247 | if (onsigstack && !likely(on_sig_stack(sp))) | 245 | if (onsigstack && !likely(on_sig_stack(sp))) |
248 | return (void __user *)-1L; | 246 | return (void __user *)-1L; |
249 | 247 | ||
250 | /* save i387 state */ | 248 | /* save i387 and extended state */ |
251 | if (used_math() && save_i387_xstate(*fpstate) < 0) | 249 | if (used_math() && |
250 | save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) | ||
252 | return (void __user *)-1L; | 251 | return (void __user *)-1L; |
253 | 252 | ||
254 | return (void __user *)sp; | 253 | return (void __user *)sp; |
@@ -357,7 +356,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
357 | put_user_ex(sig, &frame->sig); | 356 | put_user_ex(sig, &frame->sig); |
358 | put_user_ex(&frame->info, &frame->pinfo); | 357 | put_user_ex(&frame->info, &frame->pinfo); |
359 | put_user_ex(&frame->uc, &frame->puc); | 358 | put_user_ex(&frame->uc, &frame->puc); |
360 | err |= copy_siginfo_to_user(&frame->info, info); | ||
361 | 359 | ||
362 | /* Create the ucontext. */ | 360 | /* Create the ucontext. */ |
363 | if (cpu_has_xsave) | 361 | if (cpu_has_xsave) |
@@ -369,9 +367,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
369 | put_user_ex(sas_ss_flags(regs->sp), | 367 | put_user_ex(sas_ss_flags(regs->sp), |
370 | &frame->uc.uc_stack.ss_flags); | 368 | &frame->uc.uc_stack.ss_flags); |
371 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | 369 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); |
372 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
373 | regs, set->sig[0]); | ||
374 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
375 | 370 | ||
376 | /* Set up to return from userspace. */ | 371 | /* Set up to return from userspace. */ |
377 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | 372 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); |
@@ -388,6 +383,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
388 | */ | 383 | */ |
389 | put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); | 384 | put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); |
390 | } put_user_catch(err); | 385 | } put_user_catch(err); |
386 | |||
387 | err |= copy_siginfo_to_user(&frame->info, info); | ||
388 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
389 | regs, set->sig[0]); | ||
390 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
391 | 391 | ||
392 | if (err) | 392 | if (err) |
393 | return -EFAULT; | 393 | return -EFAULT; |
@@ -436,8 +436,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
436 | put_user_ex(sas_ss_flags(regs->sp), | 436 | put_user_ex(sas_ss_flags(regs->sp), |
437 | &frame->uc.uc_stack.ss_flags); | 437 | &frame->uc.uc_stack.ss_flags); |
438 | put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | 438 | put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); |
439 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | ||
440 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
441 | 439 | ||
442 | /* Set up to return from userspace. If provided, use a stub | 440 | /* Set up to return from userspace. If provided, use a stub |
443 | already in userspace. */ | 441 | already in userspace. */ |
@@ -450,6 +448,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
450 | } | 448 | } |
451 | } put_user_catch(err); | 449 | } put_user_catch(err); |
452 | 450 | ||
451 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | ||
452 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
453 | |||
453 | if (err) | 454 | if (err) |
454 | return -EFAULT; | 455 | return -EFAULT; |
455 | 456 | ||
@@ -474,6 +475,75 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
474 | } | 475 | } |
475 | #endif /* CONFIG_X86_32 */ | 476 | #endif /* CONFIG_X86_32 */ |
476 | 477 | ||
478 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
479 | siginfo_t *info, compat_sigset_t *set, | ||
480 | struct pt_regs *regs) | ||
481 | { | ||
482 | #ifdef CONFIG_X86_X32_ABI | ||
483 | struct rt_sigframe_x32 __user *frame; | ||
484 | void __user *restorer; | ||
485 | int err = 0; | ||
486 | void __user *fpstate = NULL; | ||
487 | |||
488 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | ||
489 | |||
490 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
491 | return -EFAULT; | ||
492 | |||
493 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
494 | if (copy_siginfo_to_user32(&frame->info, info)) | ||
495 | return -EFAULT; | ||
496 | } | ||
497 | |||
498 | put_user_try { | ||
499 | /* Create the ucontext. */ | ||
500 | if (cpu_has_xsave) | ||
501 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
502 | else | ||
503 | put_user_ex(0, &frame->uc.uc_flags); | ||
504 | put_user_ex(0, &frame->uc.uc_link); | ||
505 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
506 | put_user_ex(sas_ss_flags(regs->sp), | ||
507 | &frame->uc.uc_stack.ss_flags); | ||
508 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
509 | put_user_ex(0, &frame->uc.uc__pad0); | ||
510 | |||
511 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
512 | restorer = ka->sa.sa_restorer; | ||
513 | } else { | ||
514 | /* could use a vstub here */ | ||
515 | restorer = NULL; | ||
516 | err |= -EFAULT; | ||
517 | } | ||
518 | put_user_ex(restorer, &frame->pretcode); | ||
519 | } put_user_catch(err); | ||
520 | |||
521 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
522 | regs, set->sig[0]); | ||
523 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
524 | |||
525 | if (err) | ||
526 | return -EFAULT; | ||
527 | |||
528 | /* Set up registers for signal handler */ | ||
529 | regs->sp = (unsigned long) frame; | ||
530 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
531 | |||
532 | /* We use the x32 calling convention here... */ | ||
533 | regs->di = sig; | ||
534 | regs->si = (unsigned long) &frame->info; | ||
535 | regs->dx = (unsigned long) &frame->uc; | ||
536 | |||
537 | loadsegment(ds, __USER_DS); | ||
538 | loadsegment(es, __USER_DS); | ||
539 | |||
540 | regs->cs = __USER_CS; | ||
541 | regs->ss = __USER_DS; | ||
542 | #endif /* CONFIG_X86_X32_ABI */ | ||
543 | |||
544 | return 0; | ||
545 | } | ||
546 | |||
477 | #ifdef CONFIG_X86_32 | 547 | #ifdef CONFIG_X86_32 |
478 | /* | 548 | /* |
479 | * Atomically swap in the new signal mask, and wait for a signal. | 549 | * Atomically swap in the new signal mask, and wait for a signal. |
@@ -612,55 +682,22 @@ static int signr_convert(int sig) | |||
612 | return sig; | 682 | return sig; |
613 | } | 683 | } |
614 | 684 | ||
615 | #ifdef CONFIG_X86_32 | ||
616 | |||
617 | #define is_ia32 1 | ||
618 | #define ia32_setup_frame __setup_frame | ||
619 | #define ia32_setup_rt_frame __setup_rt_frame | ||
620 | |||
621 | #else /* !CONFIG_X86_32 */ | ||
622 | |||
623 | #ifdef CONFIG_IA32_EMULATION | ||
624 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
625 | #else /* !CONFIG_IA32_EMULATION */ | ||
626 | #define is_ia32 0 | ||
627 | #endif /* CONFIG_IA32_EMULATION */ | ||
628 | |||
629 | #ifdef CONFIG_X86_X32_ABI | ||
630 | #define is_x32 test_thread_flag(TIF_X32) | ||
631 | |||
632 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
633 | siginfo_t *info, compat_sigset_t *set, | ||
634 | struct pt_regs *regs); | ||
635 | #else /* !CONFIG_X86_X32_ABI */ | ||
636 | #define is_x32 0 | ||
637 | #endif /* CONFIG_X86_X32_ABI */ | ||
638 | |||
639 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
640 | sigset_t *set, struct pt_regs *regs); | ||
641 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
642 | sigset_t *set, struct pt_regs *regs); | ||
643 | |||
644 | #endif /* CONFIG_X86_32 */ | ||
645 | |||
646 | static int | 685 | static int |
647 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 686 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
648 | struct pt_regs *regs) | 687 | struct pt_regs *regs) |
649 | { | 688 | { |
650 | int usig = signr_convert(sig); | 689 | int usig = signr_convert(sig); |
651 | sigset_t *set = sigmask_to_save(); | 690 | sigset_t *set = sigmask_to_save(); |
691 | compat_sigset_t *cset = (compat_sigset_t *) set; | ||
652 | 692 | ||
653 | /* Set up the stack frame */ | 693 | /* Set up the stack frame */ |
654 | if (is_ia32) { | 694 | if (is_ia32_frame()) { |
655 | if (ka->sa.sa_flags & SA_SIGINFO) | 695 | if (ka->sa.sa_flags & SA_SIGINFO) |
656 | return ia32_setup_rt_frame(usig, ka, info, set, regs); | 696 | return ia32_setup_rt_frame(usig, ka, info, cset, regs); |
657 | else | 697 | else |
658 | return ia32_setup_frame(usig, ka, set, regs); | 698 | return ia32_setup_frame(usig, ka, cset, regs); |
659 | #ifdef CONFIG_X86_X32_ABI | 699 | } else if (is_x32_frame()) { |
660 | } else if (is_x32) { | 700 | return x32_setup_rt_frame(usig, ka, info, cset, regs); |
661 | return x32_setup_rt_frame(usig, ka, info, | ||
662 | (compat_sigset_t *)set, regs); | ||
663 | #endif | ||
664 | } else { | 701 | } else { |
665 | return __setup_rt_frame(sig, ka, info, set, regs); | 702 | return __setup_rt_frame(sig, ka, info, set, regs); |
666 | } | 703 | } |
@@ -779,6 +816,8 @@ static void do_signal(struct pt_regs *regs) | |||
779 | void | 816 | void |
780 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 817 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
781 | { | 818 | { |
819 | rcu_user_exit(); | ||
820 | |||
782 | #ifdef CONFIG_X86_MCE | 821 | #ifdef CONFIG_X86_MCE |
783 | /* notify userspace of pending MCEs */ | 822 | /* notify userspace of pending MCEs */ |
784 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 823 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
@@ -801,9 +840,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
801 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) | 840 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) |
802 | fire_user_return_notifiers(); | 841 | fire_user_return_notifiers(); |
803 | 842 | ||
804 | #ifdef CONFIG_X86_32 | 843 | rcu_user_enter(); |
805 | clear_thread_flag(TIF_IRET); | ||
806 | #endif /* CONFIG_X86_32 */ | ||
807 | } | 844 | } |
808 | 845 | ||
809 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 846 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
@@ -824,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
824 | } | 861 | } |
825 | 862 | ||
826 | #ifdef CONFIG_X86_X32_ABI | 863 | #ifdef CONFIG_X86_X32_ABI |
827 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
828 | siginfo_t *info, compat_sigset_t *set, | ||
829 | struct pt_regs *regs) | ||
830 | { | ||
831 | struct rt_sigframe_x32 __user *frame; | ||
832 | void __user *restorer; | ||
833 | int err = 0; | ||
834 | void __user *fpstate = NULL; | ||
835 | |||
836 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | ||
837 | |||
838 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
839 | return -EFAULT; | ||
840 | |||
841 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
842 | if (copy_siginfo_to_user32(&frame->info, info)) | ||
843 | return -EFAULT; | ||
844 | } | ||
845 | |||
846 | put_user_try { | ||
847 | /* Create the ucontext. */ | ||
848 | if (cpu_has_xsave) | ||
849 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
850 | else | ||
851 | put_user_ex(0, &frame->uc.uc_flags); | ||
852 | put_user_ex(0, &frame->uc.uc_link); | ||
853 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
854 | put_user_ex(sas_ss_flags(regs->sp), | ||
855 | &frame->uc.uc_stack.ss_flags); | ||
856 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
857 | put_user_ex(0, &frame->uc.uc__pad0); | ||
858 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
859 | regs, set->sig[0]); | ||
860 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
861 | |||
862 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
863 | restorer = ka->sa.sa_restorer; | ||
864 | } else { | ||
865 | /* could use a vstub here */ | ||
866 | restorer = NULL; | ||
867 | err |= -EFAULT; | ||
868 | } | ||
869 | put_user_ex(restorer, &frame->pretcode); | ||
870 | } put_user_catch(err); | ||
871 | |||
872 | if (err) | ||
873 | return -EFAULT; | ||
874 | |||
875 | /* Set up registers for signal handler */ | ||
876 | regs->sp = (unsigned long) frame; | ||
877 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
878 | |||
879 | /* We use the x32 calling convention here... */ | ||
880 | regs->di = sig; | ||
881 | regs->si = (unsigned long) &frame->info; | ||
882 | regs->dx = (unsigned long) &frame->uc; | ||
883 | |||
884 | loadsegment(ds, __USER_DS); | ||
885 | loadsegment(es, __USER_DS); | ||
886 | |||
887 | regs->cs = __USER_CS; | ||
888 | regs->ss = __USER_DS; | ||
889 | |||
890 | return 0; | ||
891 | } | ||
892 | |||
893 | asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) | 864 | asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) |
894 | { | 865 | { |
895 | struct rt_sigframe_x32 __user *frame; | 866 | struct rt_sigframe_x32 __user *frame; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c5a8c314c02..c80a33bc528b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
665 | unsigned long boot_error = 0; | 665 | unsigned long boot_error = 0; |
666 | int timeout; | 666 | int timeout; |
667 | 667 | ||
668 | alternatives_smp_switch(1); | 668 | /* Just in case we booted with a single CPU. */ |
669 | alternatives_enable_smp(); | ||
669 | 670 | ||
670 | idle->thread.sp = (unsigned long) (((struct pt_regs *) | 671 | idle->thread.sp = (unsigned long) (((struct pt_regs *) |
671 | (THREAD_SIZE + task_stack_page(idle))) - 1); | 672 | (THREAD_SIZE + task_stack_page(idle))) - 1); |
@@ -1053,20 +1054,6 @@ out: | |||
1053 | preempt_enable(); | 1054 | preempt_enable(); |
1054 | } | 1055 | } |
1055 | 1056 | ||
1056 | void arch_disable_nonboot_cpus_begin(void) | ||
1057 | { | ||
1058 | /* | ||
1059 | * Avoid the smp alternatives switch during the disable_nonboot_cpus(). | ||
1060 | * In the suspend path, we will be back in the SMP mode shortly anyways. | ||
1061 | */ | ||
1062 | skip_smp_alternatives = true; | ||
1063 | } | ||
1064 | |||
1065 | void arch_disable_nonboot_cpus_end(void) | ||
1066 | { | ||
1067 | skip_smp_alternatives = false; | ||
1068 | } | ||
1069 | |||
1070 | void arch_enable_nonboot_cpus_begin(void) | 1057 | void arch_enable_nonboot_cpus_begin(void) |
1071 | { | 1058 | { |
1072 | set_mtrr_aps_delayed_init(); | 1059 | set_mtrr_aps_delayed_init(); |
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu) | |||
1256 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1243 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
1257 | if (system_state == SYSTEM_RUNNING) | 1244 | if (system_state == SYSTEM_RUNNING) |
1258 | pr_info("CPU %u is now offline\n", cpu); | 1245 | pr_info("CPU %u is now offline\n", cpu); |
1259 | |||
1260 | if (1 == num_online_cpus()) | ||
1261 | alternatives_smp_switch(0); | ||
1262 | return; | 1246 | return; |
1263 | } | 1247 | } |
1264 | msleep(100); | 1248 | msleep(100); |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d1161488..cd3b2438a980 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child) | |||
157 | return 1; | 157 | return 1; |
158 | } | 158 | } |
159 | 159 | ||
160 | void set_task_blockstep(struct task_struct *task, bool on) | ||
161 | { | ||
162 | unsigned long debugctl; | ||
163 | |||
164 | /* | ||
165 | * Ensure irq/preemption can't change debugctl in between. | ||
166 | * Note also that both TIF_BLOCKSTEP and debugctl should | ||
167 | * be changed atomically wrt preemption. | ||
168 | * FIXME: this means that set/clear TIF_BLOCKSTEP is simply | ||
169 | * wrong if task != current, SIGKILL can wakeup the stopped | ||
170 | * tracee and set/clear can play with the running task, this | ||
171 | * can confuse the next __switch_to_xtra(). | ||
172 | */ | ||
173 | local_irq_disable(); | ||
174 | debugctl = get_debugctlmsr(); | ||
175 | if (on) { | ||
176 | debugctl |= DEBUGCTLMSR_BTF; | ||
177 | set_tsk_thread_flag(task, TIF_BLOCKSTEP); | ||
178 | } else { | ||
179 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
180 | clear_tsk_thread_flag(task, TIF_BLOCKSTEP); | ||
181 | } | ||
182 | if (task == current) | ||
183 | update_debugctlmsr(debugctl); | ||
184 | local_irq_enable(); | ||
185 | } | ||
186 | |||
160 | /* | 187 | /* |
161 | * Enable single or block step. | 188 | * Enable single or block step. |
162 | */ | 189 | */ |
@@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block) | |||
169 | * So no one should try to use debugger block stepping in a program | 196 | * So no one should try to use debugger block stepping in a program |
170 | * that uses user-mode single stepping itself. | 197 | * that uses user-mode single stepping itself. |
171 | */ | 198 | */ |
172 | if (enable_single_step(child) && block) { | 199 | if (enable_single_step(child) && block) |
173 | unsigned long debugctl = get_debugctlmsr(); | 200 | set_task_blockstep(child, true); |
174 | 201 | else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) | |
175 | debugctl |= DEBUGCTLMSR_BTF; | 202 | set_task_blockstep(child, false); |
176 | update_debugctlmsr(debugctl); | ||
177 | set_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
178 | } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { | ||
179 | unsigned long debugctl = get_debugctlmsr(); | ||
180 | |||
181 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
182 | update_debugctlmsr(debugctl); | ||
183 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
184 | } | ||
185 | } | 203 | } |
186 | 204 | ||
187 | void user_enable_single_step(struct task_struct *child) | 205 | void user_enable_single_step(struct task_struct *child) |
@@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child) | |||
199 | /* | 217 | /* |
200 | * Make sure block stepping (BTF) is disabled. | 218 | * Make sure block stepping (BTF) is disabled. |
201 | */ | 219 | */ |
202 | if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { | 220 | if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) |
203 | unsigned long debugctl = get_debugctlmsr(); | 221 | set_task_blockstep(child, false); |
204 | |||
205 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
206 | update_debugctlmsr(debugctl); | ||
207 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
208 | } | ||
209 | 222 | ||
210 | /* Always clear TIF_SINGLESTEP... */ | 223 | /* Always clear TIF_SINGLESTEP... */ |
211 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | 224 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c deleted file mode 100644 index 0b0cb5fede19..000000000000 --- a/arch/x86/kernel/sys_i386_32.c +++ /dev/null | |||
@@ -1,40 +0,0 @@ | |||
1 | /* | ||
2 | * This file contains various random system calls that | ||
3 | * have a non-standard calling sequence on the Linux/i386 | ||
4 | * platform. | ||
5 | */ | ||
6 | |||
7 | #include <linux/errno.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/smp.h> | ||
12 | #include <linux/sem.h> | ||
13 | #include <linux/msg.h> | ||
14 | #include <linux/shm.h> | ||
15 | #include <linux/stat.h> | ||
16 | #include <linux/syscalls.h> | ||
17 | #include <linux/mman.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/utsname.h> | ||
20 | #include <linux/ipc.h> | ||
21 | |||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/unistd.h> | ||
24 | |||
25 | #include <asm/syscalls.h> | ||
26 | |||
27 | /* | ||
28 | * Do a system call from kernel instead of calling sys_execve so we | ||
29 | * end up with proper pt_regs. | ||
30 | */ | ||
31 | int kernel_execve(const char *filename, | ||
32 | const char *const argv[], | ||
33 | const char *const envp[]) | ||
34 | { | ||
35 | long __res; | ||
36 | asm volatile ("int $0x80" | ||
37 | : "=a" (__res) | ||
38 | : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory"); | ||
39 | return __res; | ||
40 | } | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c9369..8276dc6794cc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/i387.h> | 55 | #include <asm/i387.h> |
56 | #include <asm/fpu-internal.h> | 56 | #include <asm/fpu-internal.h> |
57 | #include <asm/mce.h> | 57 | #include <asm/mce.h> |
58 | #include <asm/rcu.h> | ||
58 | 59 | ||
59 | #include <asm/mach_traps.h> | 60 | #include <asm/mach_traps.h> |
60 | 61 | ||
@@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
107 | dec_preempt_count(); | 108 | dec_preempt_count(); |
108 | } | 109 | } |
109 | 110 | ||
110 | static void __kprobes | 111 | static int __kprobes |
111 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | 112 | do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, |
112 | long error_code, siginfo_t *info) | 113 | struct pt_regs *regs, long error_code) |
113 | { | 114 | { |
114 | struct task_struct *tsk = current; | ||
115 | |||
116 | #ifdef CONFIG_X86_32 | 115 | #ifdef CONFIG_X86_32 |
117 | if (regs->flags & X86_VM_MASK) { | 116 | if (regs->flags & X86_VM_MASK) { |
118 | /* | 117 | /* |
119 | * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. | 118 | * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. |
120 | * On nmi (interrupt 2), do_trap should not be called. | 119 | * On nmi (interrupt 2), do_trap should not be called. |
121 | */ | 120 | */ |
122 | if (trapnr < X86_TRAP_UD) | 121 | if (trapnr < X86_TRAP_UD) { |
123 | goto vm86_trap; | 122 | if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, |
124 | goto trap_signal; | 123 | error_code, trapnr)) |
124 | return 0; | ||
125 | } | ||
126 | return -1; | ||
125 | } | 127 | } |
126 | #endif | 128 | #endif |
129 | if (!user_mode(regs)) { | ||
130 | if (!fixup_exception(regs)) { | ||
131 | tsk->thread.error_code = error_code; | ||
132 | tsk->thread.trap_nr = trapnr; | ||
133 | die(str, regs, error_code); | ||
134 | } | ||
135 | return 0; | ||
136 | } | ||
127 | 137 | ||
128 | if (!user_mode(regs)) | 138 | return -1; |
129 | goto kernel_trap; | 139 | } |
130 | 140 | ||
131 | #ifdef CONFIG_X86_32 | 141 | static void __kprobes |
132 | trap_signal: | 142 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
133 | #endif | 143 | long error_code, siginfo_t *info) |
144 | { | ||
145 | struct task_struct *tsk = current; | ||
146 | |||
147 | |||
148 | if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) | ||
149 | return; | ||
134 | /* | 150 | /* |
135 | * We want error_code and trap_nr set for userspace faults and | 151 | * We want error_code and trap_nr set for userspace faults and |
136 | * kernelspace faults which result in die(), but not | 152 | * kernelspace faults which result in die(), but not |
@@ -158,33 +174,20 @@ trap_signal: | |||
158 | force_sig_info(signr, info, tsk); | 174 | force_sig_info(signr, info, tsk); |
159 | else | 175 | else |
160 | force_sig(signr, tsk); | 176 | force_sig(signr, tsk); |
161 | return; | ||
162 | |||
163 | kernel_trap: | ||
164 | if (!fixup_exception(regs)) { | ||
165 | tsk->thread.error_code = error_code; | ||
166 | tsk->thread.trap_nr = trapnr; | ||
167 | die(str, regs, error_code); | ||
168 | } | ||
169 | return; | ||
170 | |||
171 | #ifdef CONFIG_X86_32 | ||
172 | vm86_trap: | ||
173 | if (handle_vm86_trap((struct kernel_vm86_regs *) regs, | ||
174 | error_code, trapnr)) | ||
175 | goto trap_signal; | ||
176 | return; | ||
177 | #endif | ||
178 | } | 177 | } |
179 | 178 | ||
180 | #define DO_ERROR(trapnr, signr, str, name) \ | 179 | #define DO_ERROR(trapnr, signr, str, name) \ |
181 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | 180 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
182 | { \ | 181 | { \ |
183 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 182 | exception_enter(regs); \ |
184 | == NOTIFY_STOP) \ | 183 | if (notify_die(DIE_TRAP, str, regs, error_code, \ |
184 | trapnr, signr) == NOTIFY_STOP) { \ | ||
185 | exception_exit(regs); \ | ||
185 | return; \ | 186 | return; \ |
187 | } \ | ||
186 | conditional_sti(regs); \ | 188 | conditional_sti(regs); \ |
187 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | 189 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ |
190 | exception_exit(regs); \ | ||
188 | } | 191 | } |
189 | 192 | ||
190 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 193 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
@@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | |||
195 | info.si_errno = 0; \ | 198 | info.si_errno = 0; \ |
196 | info.si_code = sicode; \ | 199 | info.si_code = sicode; \ |
197 | info.si_addr = (void __user *)siaddr; \ | 200 | info.si_addr = (void __user *)siaddr; \ |
198 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 201 | exception_enter(regs); \ |
199 | == NOTIFY_STOP) \ | 202 | if (notify_die(DIE_TRAP, str, regs, error_code, \ |
203 | trapnr, signr) == NOTIFY_STOP) { \ | ||
204 | exception_exit(regs); \ | ||
200 | return; \ | 205 | return; \ |
206 | } \ | ||
201 | conditional_sti(regs); \ | 207 | conditional_sti(regs); \ |
202 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | 208 | do_trap(trapnr, signr, str, regs, error_code, &info); \ |
209 | exception_exit(regs); \ | ||
203 | } | 210 | } |
204 | 211 | ||
205 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, | 212 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, |
@@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, | |||
222 | /* Runs on IST stack */ | 229 | /* Runs on IST stack */ |
223 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | 230 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) |
224 | { | 231 | { |
232 | exception_enter(regs); | ||
225 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | 233 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, |
226 | X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) | 234 | X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { |
227 | return; | 235 | preempt_conditional_sti(regs); |
228 | preempt_conditional_sti(regs); | 236 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); |
229 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); | 237 | preempt_conditional_cli(regs); |
230 | preempt_conditional_cli(regs); | 238 | } |
239 | exception_exit(regs); | ||
231 | } | 240 | } |
232 | 241 | ||
233 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | 242 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
@@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
235 | static const char str[] = "double fault"; | 244 | static const char str[] = "double fault"; |
236 | struct task_struct *tsk = current; | 245 | struct task_struct *tsk = current; |
237 | 246 | ||
247 | exception_enter(regs); | ||
238 | /* Return not checked because double check cannot be ignored */ | 248 | /* Return not checked because double check cannot be ignored */ |
239 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 249 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
240 | 250 | ||
@@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
255 | { | 265 | { |
256 | struct task_struct *tsk; | 266 | struct task_struct *tsk; |
257 | 267 | ||
268 | exception_enter(regs); | ||
258 | conditional_sti(regs); | 269 | conditional_sti(regs); |
259 | 270 | ||
260 | #ifdef CONFIG_X86_32 | 271 | #ifdef CONFIG_X86_32 |
261 | if (regs->flags & X86_VM_MASK) | 272 | if (regs->flags & X86_VM_MASK) { |
262 | goto gp_in_vm86; | 273 | local_irq_enable(); |
274 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
275 | goto exit; | ||
276 | } | ||
263 | #endif | 277 | #endif |
264 | 278 | ||
265 | tsk = current; | 279 | tsk = current; |
266 | if (!user_mode(regs)) | 280 | if (!user_mode(regs)) { |
267 | goto gp_in_kernel; | 281 | if (fixup_exception(regs)) |
282 | goto exit; | ||
283 | |||
284 | tsk->thread.error_code = error_code; | ||
285 | tsk->thread.trap_nr = X86_TRAP_GP; | ||
286 | if (notify_die(DIE_GPF, "general protection fault", regs, error_code, | ||
287 | X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) | ||
288 | die("general protection fault", regs, error_code); | ||
289 | goto exit; | ||
290 | } | ||
268 | 291 | ||
269 | tsk->thread.error_code = error_code; | 292 | tsk->thread.error_code = error_code; |
270 | tsk->thread.trap_nr = X86_TRAP_GP; | 293 | tsk->thread.trap_nr = X86_TRAP_GP; |
@@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
279 | } | 302 | } |
280 | 303 | ||
281 | force_sig(SIGSEGV, tsk); | 304 | force_sig(SIGSEGV, tsk); |
282 | return; | 305 | exit: |
283 | 306 | exception_exit(regs); | |
284 | #ifdef CONFIG_X86_32 | ||
285 | gp_in_vm86: | ||
286 | local_irq_enable(); | ||
287 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
288 | return; | ||
289 | #endif | ||
290 | |||
291 | gp_in_kernel: | ||
292 | if (fixup_exception(regs)) | ||
293 | return; | ||
294 | |||
295 | tsk->thread.error_code = error_code; | ||
296 | tsk->thread.trap_nr = X86_TRAP_GP; | ||
297 | if (notify_die(DIE_GPF, "general protection fault", regs, error_code, | ||
298 | X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) | ||
299 | return; | ||
300 | die("general protection fault", regs, error_code); | ||
301 | } | 307 | } |
302 | 308 | ||
303 | /* May run on IST stack. */ | 309 | /* May run on IST stack. */ |
@@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co | |||
312 | ftrace_int3_handler(regs)) | 318 | ftrace_int3_handler(regs)) |
313 | return; | 319 | return; |
314 | #endif | 320 | #endif |
321 | exception_enter(regs); | ||
315 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 322 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
316 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 323 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
317 | SIGTRAP) == NOTIFY_STOP) | 324 | SIGTRAP) == NOTIFY_STOP) |
318 | return; | 325 | goto exit; |
319 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | 326 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ |
320 | 327 | ||
321 | if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 328 | if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
322 | SIGTRAP) == NOTIFY_STOP) | 329 | SIGTRAP) == NOTIFY_STOP) |
323 | return; | 330 | goto exit; |
324 | 331 | ||
325 | /* | 332 | /* |
326 | * Let others (NMI) know that the debug stack is in use | 333 | * Let others (NMI) know that the debug stack is in use |
@@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co | |||
331 | do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); | 338 | do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); |
332 | preempt_conditional_cli(regs); | 339 | preempt_conditional_cli(regs); |
333 | debug_stack_usage_dec(); | 340 | debug_stack_usage_dec(); |
341 | exit: | ||
342 | exception_exit(regs); | ||
334 | } | 343 | } |
335 | 344 | ||
336 | #ifdef CONFIG_X86_64 | 345 | #ifdef CONFIG_X86_64 |
@@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
391 | unsigned long dr6; | 400 | unsigned long dr6; |
392 | int si_code; | 401 | int si_code; |
393 | 402 | ||
403 | exception_enter(regs); | ||
404 | |||
394 | get_debugreg(dr6, 6); | 405 | get_debugreg(dr6, 6); |
395 | 406 | ||
396 | /* Filter out all the reserved bits which are preset to 1 */ | 407 | /* Filter out all the reserved bits which are preset to 1 */ |
@@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
406 | 417 | ||
407 | /* Catch kmemcheck conditions first of all! */ | 418 | /* Catch kmemcheck conditions first of all! */ |
408 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) | 419 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
409 | return; | 420 | goto exit; |
410 | 421 | ||
411 | /* DR6 may or may not be cleared by the CPU */ | 422 | /* DR6 may or may not be cleared by the CPU */ |
412 | set_debugreg(0, 6); | 423 | set_debugreg(0, 6); |
@@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
421 | 432 | ||
422 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | 433 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, |
423 | SIGTRAP) == NOTIFY_STOP) | 434 | SIGTRAP) == NOTIFY_STOP) |
424 | return; | 435 | goto exit; |
425 | 436 | ||
426 | /* | 437 | /* |
427 | * Let others (NMI) know that the debug stack is in use | 438 | * Let others (NMI) know that the debug stack is in use |
@@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
437 | X86_TRAP_DB); | 448 | X86_TRAP_DB); |
438 | preempt_conditional_cli(regs); | 449 | preempt_conditional_cli(regs); |
439 | debug_stack_usage_dec(); | 450 | debug_stack_usage_dec(); |
440 | return; | 451 | goto exit; |
441 | } | 452 | } |
442 | 453 | ||
443 | /* | 454 | /* |
@@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
458 | preempt_conditional_cli(regs); | 469 | preempt_conditional_cli(regs); |
459 | debug_stack_usage_dec(); | 470 | debug_stack_usage_dec(); |
460 | 471 | ||
461 | return; | 472 | exit: |
473 | exception_exit(regs); | ||
462 | } | 474 | } |
463 | 475 | ||
464 | /* | 476 | /* |
@@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | |||
555 | #ifdef CONFIG_X86_32 | 567 | #ifdef CONFIG_X86_32 |
556 | ignore_fpu_irq = 1; | 568 | ignore_fpu_irq = 1; |
557 | #endif | 569 | #endif |
558 | 570 | exception_enter(regs); | |
559 | math_error(regs, error_code, X86_TRAP_MF); | 571 | math_error(regs, error_code, X86_TRAP_MF); |
572 | exception_exit(regs); | ||
560 | } | 573 | } |
561 | 574 | ||
562 | dotraplinkage void | 575 | dotraplinkage void |
563 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 576 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) |
564 | { | 577 | { |
578 | exception_enter(regs); | ||
565 | math_error(regs, error_code, X86_TRAP_XF); | 579 | math_error(regs, error_code, X86_TRAP_XF); |
580 | exception_exit(regs); | ||
566 | } | 581 | } |
567 | 582 | ||
568 | dotraplinkage void | 583 | dotraplinkage void |
@@ -613,11 +628,12 @@ void math_state_restore(void) | |||
613 | } | 628 | } |
614 | 629 | ||
615 | __thread_fpu_begin(tsk); | 630 | __thread_fpu_begin(tsk); |
631 | |||
616 | /* | 632 | /* |
617 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | 633 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. |
618 | */ | 634 | */ |
619 | if (unlikely(restore_fpu_checking(tsk))) { | 635 | if (unlikely(restore_fpu_checking(tsk))) { |
620 | __thread_fpu_end(tsk); | 636 | drop_init_fpu(tsk); |
621 | force_sig(SIGSEGV, tsk); | 637 | force_sig(SIGSEGV, tsk); |
622 | return; | 638 | return; |
623 | } | 639 | } |
@@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore); | |||
629 | dotraplinkage void __kprobes | 645 | dotraplinkage void __kprobes |
630 | do_device_not_available(struct pt_regs *regs, long error_code) | 646 | do_device_not_available(struct pt_regs *regs, long error_code) |
631 | { | 647 | { |
648 | exception_enter(regs); | ||
649 | BUG_ON(use_eager_fpu()); | ||
650 | |||
632 | #ifdef CONFIG_MATH_EMULATION | 651 | #ifdef CONFIG_MATH_EMULATION |
633 | if (read_cr0() & X86_CR0_EM) { | 652 | if (read_cr0() & X86_CR0_EM) { |
634 | struct math_emu_info info = { }; | 653 | struct math_emu_info info = { }; |
@@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
637 | 656 | ||
638 | info.regs = regs; | 657 | info.regs = regs; |
639 | math_emulate(&info); | 658 | math_emulate(&info); |
659 | exception_exit(regs); | ||
640 | return; | 660 | return; |
641 | } | 661 | } |
642 | #endif | 662 | #endif |
@@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
644 | #ifdef CONFIG_X86_32 | 664 | #ifdef CONFIG_X86_32 |
645 | conditional_sti(regs); | 665 | conditional_sti(regs); |
646 | #endif | 666 | #endif |
667 | exception_exit(regs); | ||
647 | } | 668 | } |
648 | 669 | ||
649 | #ifdef CONFIG_X86_32 | 670 | #ifdef CONFIG_X86_32 |
650 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | 671 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) |
651 | { | 672 | { |
652 | siginfo_t info; | 673 | siginfo_t info; |
674 | |||
675 | exception_enter(regs); | ||
653 | local_irq_enable(); | 676 | local_irq_enable(); |
654 | 677 | ||
655 | info.si_signo = SIGILL; | 678 | info.si_signo = SIGILL; |
@@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
657 | info.si_code = ILL_BADSTK; | 680 | info.si_code = ILL_BADSTK; |
658 | info.si_addr = NULL; | 681 | info.si_addr = NULL; |
659 | if (notify_die(DIE_TRAP, "iret exception", regs, error_code, | 682 | if (notify_die(DIE_TRAP, "iret exception", regs, error_code, |
660 | X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) | 683 | X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { |
661 | return; | 684 | do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, |
662 | do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, | 685 | &info); |
663 | &info); | 686 | } |
687 | exception_exit(regs); | ||
664 | } | 688 | } |
665 | #endif | 689 | #endif |
666 | 690 | ||
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa7..9538f00827a9 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -41,6 +41,9 @@ | |||
41 | /* Adjust the return address of a call insn */ | 41 | /* Adjust the return address of a call insn */ |
42 | #define UPROBE_FIX_CALL 0x2 | 42 | #define UPROBE_FIX_CALL 0x2 |
43 | 43 | ||
44 | /* Instruction will modify TF, don't change it */ | ||
45 | #define UPROBE_FIX_SETF 0x4 | ||
46 | |||
44 | #define UPROBE_FIX_RIP_AX 0x8000 | 47 | #define UPROBE_FIX_RIP_AX 0x8000 |
45 | #define UPROBE_FIX_RIP_CX 0x4000 | 48 | #define UPROBE_FIX_RIP_CX 0x4000 |
46 | 49 | ||
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) | |||
239 | insn_get_opcode(insn); /* should be a nop */ | 242 | insn_get_opcode(insn); /* should be a nop */ |
240 | 243 | ||
241 | switch (OPCODE1(insn)) { | 244 | switch (OPCODE1(insn)) { |
245 | case 0x9d: | ||
246 | /* popf */ | ||
247 | auprobe->fixups |= UPROBE_FIX_SETF; | ||
248 | break; | ||
242 | case 0xc3: /* ret/lret */ | 249 | case 0xc3: /* ret/lret */ |
243 | case 0xcb: | 250 | case 0xcb: |
244 | case 0xc2: | 251 | case 0xc2: |
@@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
646 | * Skip these instructions as per the currently known x86 ISA. | 653 | * Skip these instructions as per the currently known x86 ISA. |
647 | * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } | 654 | * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } |
648 | */ | 655 | */ |
649 | bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | 656 | static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) |
650 | { | 657 | { |
651 | int i; | 658 | int i; |
652 | 659 | ||
@@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
673 | } | 680 | } |
674 | return false; | 681 | return false; |
675 | } | 682 | } |
683 | |||
684 | bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
685 | { | ||
686 | bool ret = __skip_sstep(auprobe, regs); | ||
687 | if (ret && (regs->flags & X86_EFLAGS_TF)) | ||
688 | send_sig(SIGTRAP, current, 0); | ||
689 | return ret; | ||
690 | } | ||
691 | |||
692 | void arch_uprobe_enable_step(struct arch_uprobe *auprobe) | ||
693 | { | ||
694 | struct task_struct *task = current; | ||
695 | struct arch_uprobe_task *autask = &task->utask->autask; | ||
696 | struct pt_regs *regs = task_pt_regs(task); | ||
697 | |||
698 | autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); | ||
699 | |||
700 | regs->flags |= X86_EFLAGS_TF; | ||
701 | if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) | ||
702 | set_task_blockstep(task, false); | ||
703 | } | ||
704 | |||
705 | void arch_uprobe_disable_step(struct arch_uprobe *auprobe) | ||
706 | { | ||
707 | struct task_struct *task = current; | ||
708 | struct arch_uprobe_task *autask = &task->utask->autask; | ||
709 | bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); | ||
710 | struct pt_regs *regs = task_pt_regs(task); | ||
711 | /* | ||
712 | * The state of TIF_BLOCKSTEP was not saved so we can get an extra | ||
713 | * SIGTRAP if we do not clear TF. We need to examine the opcode to | ||
714 | * make it right. | ||
715 | */ | ||
716 | if (unlikely(trapped)) { | ||
717 | if (!autask->saved_tf) | ||
718 | regs->flags &= ~X86_EFLAGS_TF; | ||
719 | } else { | ||
720 | if (autask->saved_tf) | ||
721 | send_sig(SIGTRAP, task, 0); | ||
722 | else if (!(auprobe->fixups & UPROBE_FIX_SETF)) | ||
723 | regs->flags &= ~X86_EFLAGS_TF; | ||
724 | } | ||
725 | } | ||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 54abcc0baf23..5c9687b1bde6 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) | |||
561 | if ((trapno == 3) || (trapno == 1)) { | 561 | if ((trapno == 3) || (trapno == 1)) { |
562 | KVM86->regs32->ax = VM86_TRAP + (trapno << 8); | 562 | KVM86->regs32->ax = VM86_TRAP + (trapno << 8); |
563 | /* setting this flag forces the code in entry_32.S to | 563 | /* setting this flag forces the code in entry_32.S to |
564 | call save_v86_state() and change the stack pointer | 564 | the path where we call save_v86_state() and change |
565 | to KVM86->regs32 */ | 565 | the stack pointer to KVM86->regs32 */ |
566 | set_thread_flag(TIF_IRET); | 566 | set_thread_flag(TIF_NOTIFY_RESUME); |
567 | return 0; | 567 | return 0; |
568 | } | 568 | } |
569 | do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); | 569 | do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8d141b309046..3a3e8c9e280d 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <linux/jiffies.h> | 28 | #include <linux/jiffies.h> |
29 | #include <linux/sysctl.h> | 29 | #include <linux/sysctl.h> |
30 | #include <linux/topology.h> | 30 | #include <linux/topology.h> |
31 | #include <linux/clocksource.h> | 31 | #include <linux/timekeeper_internal.h> |
32 | #include <linux/getcpu.h> | 32 | #include <linux/getcpu.h> |
33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
34 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
@@ -82,32 +82,41 @@ void update_vsyscall_tz(void) | |||
82 | vsyscall_gtod_data.sys_tz = sys_tz; | 82 | vsyscall_gtod_data.sys_tz = sys_tz; |
83 | } | 83 | } |
84 | 84 | ||
85 | void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | 85 | void update_vsyscall(struct timekeeper *tk) |
86 | struct clocksource *clock, u32 mult) | ||
87 | { | 86 | { |
88 | struct timespec monotonic; | 87 | struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; |
89 | 88 | ||
90 | write_seqcount_begin(&vsyscall_gtod_data.seq); | 89 | write_seqcount_begin(&vdata->seq); |
91 | 90 | ||
92 | /* copy vsyscall data */ | 91 | /* copy vsyscall data */ |
93 | vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; | 92 | vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; |
94 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | 93 | vdata->clock.cycle_last = tk->clock->cycle_last; |
95 | vsyscall_gtod_data.clock.mask = clock->mask; | 94 | vdata->clock.mask = tk->clock->mask; |
96 | vsyscall_gtod_data.clock.mult = mult; | 95 | vdata->clock.mult = tk->mult; |
97 | vsyscall_gtod_data.clock.shift = clock->shift; | 96 | vdata->clock.shift = tk->shift; |
98 | 97 | ||
99 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 98 | vdata->wall_time_sec = tk->xtime_sec; |
100 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 99 | vdata->wall_time_snsec = tk->xtime_nsec; |
100 | |||
101 | vdata->monotonic_time_sec = tk->xtime_sec | ||
102 | + tk->wall_to_monotonic.tv_sec; | ||
103 | vdata->monotonic_time_snsec = tk->xtime_nsec | ||
104 | + (tk->wall_to_monotonic.tv_nsec | ||
105 | << tk->shift); | ||
106 | while (vdata->monotonic_time_snsec >= | ||
107 | (((u64)NSEC_PER_SEC) << tk->shift)) { | ||
108 | vdata->monotonic_time_snsec -= | ||
109 | ((u64)NSEC_PER_SEC) << tk->shift; | ||
110 | vdata->monotonic_time_sec++; | ||
111 | } | ||
101 | 112 | ||
102 | monotonic = timespec_add(*wall_time, *wtm); | 113 | vdata->wall_time_coarse.tv_sec = tk->xtime_sec; |
103 | vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec; | 114 | vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); |
104 | vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec; | ||
105 | 115 | ||
106 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); | 116 | vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, |
107 | vsyscall_gtod_data.monotonic_time_coarse = | 117 | tk->wall_to_monotonic); |
108 | timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm); | ||
109 | 118 | ||
110 | write_seqcount_end(&vsyscall_gtod_data.seq); | 119 | write_seqcount_end(&vdata->seq); |
111 | } | 120 | } |
112 | 121 | ||
113 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, | 122 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927c..1330dd102950 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -13,9 +13,13 @@ | |||
13 | #include <asm/ftrace.h> | 13 | #include <asm/ftrace.h> |
14 | 14 | ||
15 | #ifdef CONFIG_FUNCTION_TRACER | 15 | #ifdef CONFIG_FUNCTION_TRACER |
16 | /* mcount is defined in assembly */ | 16 | /* mcount and __fentry__ are defined in assembly */ |
17 | #ifdef CC_USING_FENTRY | ||
18 | EXPORT_SYMBOL(__fentry__); | ||
19 | #else | ||
17 | EXPORT_SYMBOL(mcount); | 20 | EXPORT_SYMBOL(mcount); |
18 | #endif | 21 | #endif |
22 | #endif | ||
19 | 23 | ||
20 | EXPORT_SYMBOL(__get_user_1); | 24 | EXPORT_SYMBOL(__get_user_1); |
21 | EXPORT_SYMBOL(__get_user_2); | 25 | EXPORT_SYMBOL(__get_user_2); |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 3d3e20709119..ada87a329edc 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -10,9 +10,7 @@ | |||
10 | #include <linux/compat.h> | 10 | #include <linux/compat.h> |
11 | #include <asm/i387.h> | 11 | #include <asm/i387.h> |
12 | #include <asm/fpu-internal.h> | 12 | #include <asm/fpu-internal.h> |
13 | #ifdef CONFIG_IA32_EMULATION | 13 | #include <asm/sigframe.h> |
14 | #include <asm/sigcontext32.h> | ||
15 | #endif | ||
16 | #include <asm/xcr.h> | 14 | #include <asm/xcr.h> |
17 | 15 | ||
18 | /* | 16 | /* |
@@ -23,13 +21,9 @@ u64 pcntxt_mask; | |||
23 | /* | 21 | /* |
24 | * Represents init state for the supported extended state. | 22 | * Represents init state for the supported extended state. |
25 | */ | 23 | */ |
26 | static struct xsave_struct *init_xstate_buf; | 24 | struct xsave_struct *init_xstate_buf; |
27 | |||
28 | struct _fpx_sw_bytes fx_sw_reserved; | ||
29 | #ifdef CONFIG_IA32_EMULATION | ||
30 | struct _fpx_sw_bytes fx_sw_reserved_ia32; | ||
31 | #endif | ||
32 | 25 | ||
26 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; | ||
33 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | 27 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; |
34 | 28 | ||
35 | /* | 29 | /* |
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | |||
44 | */ | 38 | */ |
45 | void __sanitize_i387_state(struct task_struct *tsk) | 39 | void __sanitize_i387_state(struct task_struct *tsk) |
46 | { | 40 | { |
47 | u64 xstate_bv; | ||
48 | int feature_bit = 0x2; | ||
49 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; | 41 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; |
42 | int feature_bit = 0x2; | ||
43 | u64 xstate_bv; | ||
50 | 44 | ||
51 | if (!fx) | 45 | if (!fx) |
52 | return; | 46 | return; |
@@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk) | |||
104 | * Check for the presence of extended state information in the | 98 | * Check for the presence of extended state information in the |
105 | * user fpstate pointer in the sigcontext. | 99 | * user fpstate pointer in the sigcontext. |
106 | */ | 100 | */ |
107 | int check_for_xstate(struct i387_fxsave_struct __user *buf, | 101 | static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, |
108 | void __user *fpstate, | 102 | void __user *fpstate, |
109 | struct _fpx_sw_bytes *fx_sw_user) | 103 | struct _fpx_sw_bytes *fx_sw) |
110 | { | 104 | { |
111 | int min_xstate_size = sizeof(struct i387_fxsave_struct) + | 105 | int min_xstate_size = sizeof(struct i387_fxsave_struct) + |
112 | sizeof(struct xsave_hdr_struct); | 106 | sizeof(struct xsave_hdr_struct); |
113 | unsigned int magic2; | 107 | unsigned int magic2; |
114 | int err; | ||
115 | 108 | ||
116 | err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], | 109 | if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) |
117 | sizeof(struct _fpx_sw_bytes)); | 110 | return -1; |
118 | if (err) | ||
119 | return -EFAULT; | ||
120 | 111 | ||
121 | /* | 112 | /* Check for the first magic field and other error scenarios. */ |
122 | * First Magic check failed. | 113 | if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || |
123 | */ | 114 | fx_sw->xstate_size < min_xstate_size || |
124 | if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) | 115 | fx_sw->xstate_size > xstate_size || |
125 | return -EINVAL; | 116 | fx_sw->xstate_size > fx_sw->extended_size) |
117 | return -1; | ||
126 | 118 | ||
127 | /* | 119 | /* |
128 | * Check for error scenarios. | ||
129 | */ | ||
130 | if (fx_sw_user->xstate_size < min_xstate_size || | ||
131 | fx_sw_user->xstate_size > xstate_size || | ||
132 | fx_sw_user->xstate_size > fx_sw_user->extended_size) | ||
133 | return -EINVAL; | ||
134 | |||
135 | err = __get_user(magic2, (__u32 *) (((void *)fpstate) + | ||
136 | fx_sw_user->extended_size - | ||
137 | FP_XSTATE_MAGIC2_SIZE)); | ||
138 | if (err) | ||
139 | return err; | ||
140 | /* | ||
141 | * Check for the presence of second magic word at the end of memory | 120 | * Check for the presence of second magic word at the end of memory |
142 | * layout. This detects the case where the user just copied the legacy | 121 | * layout. This detects the case where the user just copied the legacy |
143 | * fpstate layout with out copying the extended state information | 122 | * fpstate layout with out copying the extended state information |
144 | * in the memory layout. | 123 | * in the memory layout. |
145 | */ | 124 | */ |
146 | if (magic2 != FP_XSTATE_MAGIC2) | 125 | if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) |
147 | return -EFAULT; | 126 | || magic2 != FP_XSTATE_MAGIC2) |
127 | return -1; | ||
148 | 128 | ||
149 | return 0; | 129 | return 0; |
150 | } | 130 | } |
151 | 131 | ||
152 | #ifdef CONFIG_X86_64 | ||
153 | /* | 132 | /* |
154 | * Signal frame handlers. | 133 | * Signal frame handlers. |
155 | */ | 134 | */ |
156 | 135 | static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) | |
157 | int save_i387_xstate(void __user *buf) | ||
158 | { | 136 | { |
159 | struct task_struct *tsk = current; | 137 | if (use_fxsr()) { |
160 | int err = 0; | 138 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; |
161 | 139 | struct user_i387_ia32_struct env; | |
162 | if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) | 140 | struct _fpstate_ia32 __user *fp = buf; |
163 | return -EACCES; | ||
164 | 141 | ||
165 | BUG_ON(sig_xstate_size < xstate_size); | 142 | convert_from_fxsr(&env, tsk); |
166 | 143 | ||
167 | if ((unsigned long)buf % 64) | 144 | if (__copy_to_user(buf, &env, sizeof(env)) || |
168 | pr_err("%s: bad fpstate %p\n", __func__, buf); | 145 | __put_user(xsave->i387.swd, &fp->status) || |
169 | 146 | __put_user(X86_FXSR_MAGIC, &fp->magic)) | |
170 | if (!used_math()) | 147 | return -1; |
171 | return 0; | ||
172 | |||
173 | if (user_has_fpu()) { | ||
174 | if (use_xsave()) | ||
175 | err = xsave_user(buf); | ||
176 | else | ||
177 | err = fxsave_user(buf); | ||
178 | |||
179 | if (err) | ||
180 | return err; | ||
181 | user_fpu_end(); | ||
182 | } else { | 148 | } else { |
183 | sanitize_i387_state(tsk); | 149 | struct i387_fsave_struct __user *fp = buf; |
184 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, | 150 | u32 swd; |
185 | xstate_size)) | 151 | if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) |
186 | return -1; | 152 | return -1; |
187 | } | 153 | } |
188 | 154 | ||
189 | clear_used_math(); /* trigger finit */ | 155 | return 0; |
156 | } | ||
190 | 157 | ||
191 | if (use_xsave()) { | 158 | static inline int save_xstate_epilog(void __user *buf, int ia32_frame) |
192 | struct _fpstate __user *fx = buf; | 159 | { |
193 | struct _xstate __user *x = buf; | 160 | struct xsave_struct __user *x = buf; |
194 | u64 xstate_bv; | 161 | struct _fpx_sw_bytes *sw_bytes; |
162 | u32 xstate_bv; | ||
163 | int err; | ||
195 | 164 | ||
196 | err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, | 165 | /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ |
197 | sizeof(struct _fpx_sw_bytes)); | 166 | sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; |
167 | err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); | ||
198 | 168 | ||
199 | err |= __put_user(FP_XSTATE_MAGIC2, | 169 | if (!use_xsave()) |
200 | (__u32 __user *) (buf + sig_xstate_size | 170 | return err; |
201 | - FP_XSTATE_MAGIC2_SIZE)); | ||
202 | 171 | ||
203 | /* | 172 | err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); |
204 | * Read the xstate_bv which we copied (directly from the cpu or | ||
205 | * from the state in task struct) to the user buffers and | ||
206 | * set the FP/SSE bits. | ||
207 | */ | ||
208 | err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); | ||
209 | 173 | ||
210 | /* | 174 | /* |
211 | * For legacy compatible, we always set FP/SSE bits in the bit | 175 | * Read the xstate_bv which we copied (directly from the cpu or |
212 | * vector while saving the state to the user context. This will | 176 | * from the state in task struct) to the user buffers. |
213 | * enable us capturing any changes(during sigreturn) to | 177 | */ |
214 | * the FP/SSE bits by the legacy applications which don't touch | 178 | err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); |
215 | * xstate_bv in the xsave header. | ||
216 | * | ||
217 | * xsave aware apps can change the xstate_bv in the xsave | ||
218 | * header as well as change any contents in the memory layout. | ||
219 | * xrestore as part of sigreturn will capture all the changes. | ||
220 | */ | ||
221 | xstate_bv |= XSTATE_FPSSE; | ||
222 | 179 | ||
223 | err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); | 180 | /* |
181 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
182 | * vector while saving the state to the user context. This will | ||
183 | * enable us capturing any changes(during sigreturn) to | ||
184 | * the FP/SSE bits by the legacy applications which don't touch | ||
185 | * xstate_bv in the xsave header. | ||
186 | * | ||
187 | * xsave aware apps can change the xstate_bv in the xsave | ||
188 | * header as well as change any contents in the memory layout. | ||
189 | * xrestore as part of sigreturn will capture all the changes. | ||
190 | */ | ||
191 | xstate_bv |= XSTATE_FPSSE; | ||
224 | 192 | ||
225 | if (err) | 193 | err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); |
226 | return err; | ||
227 | } | ||
228 | 194 | ||
229 | return 1; | 195 | return err; |
196 | } | ||
197 | |||
198 | static inline int save_user_xstate(struct xsave_struct __user *buf) | ||
199 | { | ||
200 | int err; | ||
201 | |||
202 | if (use_xsave()) | ||
203 | err = xsave_user(buf); | ||
204 | else if (use_fxsr()) | ||
205 | err = fxsave_user((struct i387_fxsave_struct __user *) buf); | ||
206 | else | ||
207 | err = fsave_user((struct i387_fsave_struct __user *) buf); | ||
208 | |||
209 | if (unlikely(err) && __clear_user(buf, xstate_size)) | ||
210 | err = -EFAULT; | ||
211 | return err; | ||
230 | } | 212 | } |
231 | 213 | ||
232 | /* | 214 | /* |
233 | * Restore the extended state if present. Otherwise, restore the FP/SSE | 215 | * Save the fpu, extended register state to the user signal frame. |
234 | * state. | 216 | * |
217 | * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save | ||
218 | * state is copied. | ||
219 | * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. | ||
220 | * | ||
221 | * buf == buf_fx for 64-bit frames and 32-bit fsave frame. | ||
222 | * buf != buf_fx for 32-bit frames with fxstate. | ||
223 | * | ||
224 | * If the fpu, extended register state is live, save the state directly | ||
225 | * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, | ||
226 | * copy the thread's fpu state to the user frame starting at 'buf_fx'. | ||
227 | * | ||
228 | * If this is a 32-bit frame with fxstate, put a fsave header before | ||
229 | * the aligned state at 'buf_fx'. | ||
230 | * | ||
231 | * For [f]xsave state, update the SW reserved fields in the [f]xsave frame | ||
232 | * indicating the absence/presence of the extended state to the user. | ||
235 | */ | 233 | */ |
236 | static int restore_user_xstate(void __user *buf) | 234 | int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) |
237 | { | 235 | { |
238 | struct _fpx_sw_bytes fx_sw_user; | 236 | struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; |
239 | u64 mask; | 237 | struct task_struct *tsk = current; |
240 | int err; | 238 | int ia32_fxstate = (buf != buf_fx); |
241 | 239 | ||
242 | if (((unsigned long)buf % 64) || | 240 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || |
243 | check_for_xstate(buf, buf, &fx_sw_user)) | 241 | config_enabled(CONFIG_IA32_EMULATION)); |
244 | goto fx_only; | ||
245 | 242 | ||
246 | mask = fx_sw_user.xstate_bv; | 243 | if (!access_ok(VERIFY_WRITE, buf, size)) |
244 | return -EACCES; | ||
247 | 245 | ||
248 | /* | 246 | if (!HAVE_HWFP) |
249 | * restore the state passed by the user. | 247 | return fpregs_soft_get(current, NULL, 0, |
250 | */ | 248 | sizeof(struct user_i387_ia32_struct), NULL, |
251 | err = xrestore_user(buf, mask); | 249 | (struct _fpstate_ia32 __user *) buf) ? -1 : 1; |
252 | if (err) | ||
253 | return err; | ||
254 | 250 | ||
255 | /* | 251 | if (user_has_fpu()) { |
256 | * init the state skipped by the user. | 252 | /* Save the live register state to the user directly. */ |
257 | */ | 253 | if (save_user_xstate(buf_fx)) |
258 | mask = pcntxt_mask & ~mask; | 254 | return -1; |
259 | if (unlikely(mask)) | 255 | /* Update the thread's fxstate to save the fsave header. */ |
260 | xrstor_state(init_xstate_buf, mask); | 256 | if (ia32_fxstate) |
257 | fpu_fxsave(&tsk->thread.fpu); | ||
258 | } else { | ||
259 | sanitize_i387_state(tsk); | ||
260 | if (__copy_to_user(buf_fx, xsave, xstate_size)) | ||
261 | return -1; | ||
262 | } | ||
263 | |||
264 | /* Save the fsave header for the 32-bit frames. */ | ||
265 | if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) | ||
266 | return -1; | ||
267 | |||
268 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) | ||
269 | return -1; | ||
270 | |||
271 | drop_init_fpu(tsk); /* trigger finit */ | ||
261 | 272 | ||
262 | return 0; | 273 | return 0; |
274 | } | ||
263 | 275 | ||
264 | fx_only: | 276 | static inline void |
265 | /* | 277 | sanitize_restored_xstate(struct task_struct *tsk, |
266 | * couldn't find the extended state information in the | 278 | struct user_i387_ia32_struct *ia32_env, |
267 | * memory layout. Restore just the FP/SSE and init all | 279 | u64 xstate_bv, int fx_only) |
268 | * the other extended state. | 280 | { |
269 | */ | 281 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; |
270 | xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); | 282 | struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; |
271 | return fxrstor_checking((__force struct i387_fxsave_struct *)buf); | 283 | |
284 | if (use_xsave()) { | ||
285 | /* These bits must be zero. */ | ||
286 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
287 | |||
288 | /* | ||
289 | * Init the state that is not present in the memory | ||
290 | * layout and not enabled by the OS. | ||
291 | */ | ||
292 | if (fx_only) | ||
293 | xsave_hdr->xstate_bv = XSTATE_FPSSE; | ||
294 | else | ||
295 | xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); | ||
296 | } | ||
297 | |||
298 | if (use_fxsr()) { | ||
299 | /* | ||
300 | * mscsr reserved bits must be masked to zero for security | ||
301 | * reasons. | ||
302 | */ | ||
303 | xsave->i387.mxcsr &= mxcsr_feature_mask; | ||
304 | |||
305 | convert_to_fxsr(tsk, ia32_env); | ||
306 | } | ||
272 | } | 307 | } |
273 | 308 | ||
274 | /* | 309 | /* |
275 | * This restores directly out of user space. Exceptions are handled. | 310 | * Restore the extended state if present. Otherwise, restore the FP/SSE state. |
276 | */ | 311 | */ |
277 | int restore_i387_xstate(void __user *buf) | 312 | static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) |
278 | { | 313 | { |
314 | if (use_xsave()) { | ||
315 | if ((unsigned long)buf % 64 || fx_only) { | ||
316 | u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; | ||
317 | xrstor_state(init_xstate_buf, init_bv); | ||
318 | return fxrstor_user(buf); | ||
319 | } else { | ||
320 | u64 init_bv = pcntxt_mask & ~xbv; | ||
321 | if (unlikely(init_bv)) | ||
322 | xrstor_state(init_xstate_buf, init_bv); | ||
323 | return xrestore_user(buf, xbv); | ||
324 | } | ||
325 | } else if (use_fxsr()) { | ||
326 | return fxrstor_user(buf); | ||
327 | } else | ||
328 | return frstor_user(buf); | ||
329 | } | ||
330 | |||
331 | int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | ||
332 | { | ||
333 | int ia32_fxstate = (buf != buf_fx); | ||
279 | struct task_struct *tsk = current; | 334 | struct task_struct *tsk = current; |
280 | int err = 0; | 335 | int state_size = xstate_size; |
336 | u64 xstate_bv = 0; | ||
337 | int fx_only = 0; | ||
338 | |||
339 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || | ||
340 | config_enabled(CONFIG_IA32_EMULATION)); | ||
281 | 341 | ||
282 | if (!buf) { | 342 | if (!buf) { |
283 | if (used_math()) | 343 | drop_init_fpu(tsk); |
284 | goto clear; | ||
285 | return 0; | 344 | return 0; |
286 | } else | 345 | } |
287 | if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) | ||
288 | return -EACCES; | ||
289 | 346 | ||
290 | if (!used_math()) { | 347 | if (!access_ok(VERIFY_READ, buf, size)) |
291 | err = init_fpu(tsk); | 348 | return -EACCES; |
292 | if (err) | 349 | |
293 | return err; | 350 | if (!used_math() && init_fpu(tsk)) |
351 | return -1; | ||
352 | |||
353 | if (!HAVE_HWFP) { | ||
354 | return fpregs_soft_set(current, NULL, | ||
355 | 0, sizeof(struct user_i387_ia32_struct), | ||
356 | NULL, buf) != 0; | ||
294 | } | 357 | } |
295 | 358 | ||
296 | user_fpu_begin(); | 359 | if (use_xsave()) { |
297 | if (use_xsave()) | 360 | struct _fpx_sw_bytes fx_sw_user; |
298 | err = restore_user_xstate(buf); | 361 | if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { |
299 | else | 362 | /* |
300 | err = fxrstor_checking((__force struct i387_fxsave_struct *) | 363 | * Couldn't find the extended state information in the |
301 | buf); | 364 | * memory layout. Restore just the FP/SSE and init all |
302 | if (unlikely(err)) { | 365 | * the other extended state. |
366 | */ | ||
367 | state_size = sizeof(struct i387_fxsave_struct); | ||
368 | fx_only = 1; | ||
369 | } else { | ||
370 | state_size = fx_sw_user.xstate_size; | ||
371 | xstate_bv = fx_sw_user.xstate_bv; | ||
372 | } | ||
373 | } | ||
374 | |||
375 | if (ia32_fxstate) { | ||
376 | /* | ||
377 | * For 32-bit frames with fxstate, copy the user state to the | ||
378 | * thread's fpu state, reconstruct fxstate from the fsave | ||
379 | * header. Sanitize the copied state etc. | ||
380 | */ | ||
381 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; | ||
382 | struct user_i387_ia32_struct env; | ||
383 | int err = 0; | ||
384 | |||
385 | /* | ||
386 | * Drop the current fpu which clears used_math(). This ensures | ||
387 | * that any context-switch during the copy of the new state, | ||
388 | * avoids the intermediate state from getting restored/saved. | ||
389 | * Thus avoiding the new restored state from getting corrupted. | ||
390 | * We will be ready to restore/save the state only after | ||
391 | * set_used_math() is again set. | ||
392 | */ | ||
393 | drop_fpu(tsk); | ||
394 | |||
395 | if (__copy_from_user(xsave, buf_fx, state_size) || | ||
396 | __copy_from_user(&env, buf, sizeof(env))) { | ||
397 | err = -1; | ||
398 | } else { | ||
399 | sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); | ||
400 | set_used_math(); | ||
401 | } | ||
402 | |||
403 | if (use_eager_fpu()) | ||
404 | math_state_restore(); | ||
405 | |||
406 | return err; | ||
407 | } else { | ||
303 | /* | 408 | /* |
304 | * Encountered an error while doing the restore from the | 409 | * For 64-bit frames and 32-bit fsave frames, restore the user |
305 | * user buffer, clear the fpu state. | 410 | * state to the registers directly (with exceptions handled). |
306 | */ | 411 | */ |
307 | clear: | 412 | user_fpu_begin(); |
308 | clear_fpu(tsk); | 413 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { |
309 | clear_used_math(); | 414 | drop_init_fpu(tsk); |
415 | return -1; | ||
416 | } | ||
310 | } | 417 | } |
311 | return err; | 418 | |
419 | return 0; | ||
312 | } | 420 | } |
313 | #endif | ||
314 | 421 | ||
315 | /* | 422 | /* |
316 | * Prepare the SW reserved portion of the fxsave memory layout, indicating | 423 | * Prepare the SW reserved portion of the fxsave memory layout, indicating |
@@ -321,31 +428,22 @@ clear: | |||
321 | */ | 428 | */ |
322 | static void prepare_fx_sw_frame(void) | 429 | static void prepare_fx_sw_frame(void) |
323 | { | 430 | { |
324 | int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + | 431 | int fsave_header_size = sizeof(struct i387_fsave_struct); |
325 | FP_XSTATE_MAGIC2_SIZE; | 432 | int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; |
326 | 433 | ||
327 | sig_xstate_size = sizeof(struct _fpstate) + size_extended; | 434 | if (config_enabled(CONFIG_X86_32)) |
328 | 435 | size += fsave_header_size; | |
329 | #ifdef CONFIG_IA32_EMULATION | ||
330 | sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; | ||
331 | #endif | ||
332 | |||
333 | memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); | ||
334 | 436 | ||
335 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; | 437 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; |
336 | fx_sw_reserved.extended_size = sig_xstate_size; | 438 | fx_sw_reserved.extended_size = size; |
337 | fx_sw_reserved.xstate_bv = pcntxt_mask; | 439 | fx_sw_reserved.xstate_bv = pcntxt_mask; |
338 | fx_sw_reserved.xstate_size = xstate_size; | 440 | fx_sw_reserved.xstate_size = xstate_size; |
339 | #ifdef CONFIG_IA32_EMULATION | ||
340 | memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, | ||
341 | sizeof(struct _fpx_sw_bytes)); | ||
342 | fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; | ||
343 | #endif | ||
344 | } | ||
345 | 441 | ||
346 | #ifdef CONFIG_X86_64 | 442 | if (config_enabled(CONFIG_IA32_EMULATION)) { |
347 | unsigned int sig_xstate_size = sizeof(struct _fpstate); | 443 | fx_sw_reserved_ia32 = fx_sw_reserved; |
348 | #endif | 444 | fx_sw_reserved_ia32.extended_size += fsave_header_size; |
445 | } | ||
446 | } | ||
349 | 447 | ||
350 | /* | 448 | /* |
351 | * Enable the extended processor state save/restore feature | 449 | * Enable the extended processor state save/restore feature |
@@ -384,19 +482,21 @@ static void __init setup_xstate_features(void) | |||
384 | /* | 482 | /* |
385 | * setup the xstate image representing the init state | 483 | * setup the xstate image representing the init state |
386 | */ | 484 | */ |
387 | static void __init setup_xstate_init(void) | 485 | static void __init setup_init_fpu_buf(void) |
388 | { | 486 | { |
389 | setup_xstate_features(); | ||
390 | |||
391 | /* | 487 | /* |
392 | * Setup init_xstate_buf to represent the init state of | 488 | * Setup init_xstate_buf to represent the init state of |
393 | * all the features managed by the xsave | 489 | * all the features managed by the xsave |
394 | */ | 490 | */ |
395 | init_xstate_buf = alloc_bootmem_align(xstate_size, | 491 | init_xstate_buf = alloc_bootmem_align(xstate_size, |
396 | __alignof__(struct xsave_struct)); | 492 | __alignof__(struct xsave_struct)); |
397 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 493 | fx_finit(&init_xstate_buf->i387); |
494 | |||
495 | if (!cpu_has_xsave) | ||
496 | return; | ||
497 | |||
498 | setup_xstate_features(); | ||
398 | 499 | ||
399 | clts(); | ||
400 | /* | 500 | /* |
401 | * Init all the features state with header_bv being 0x0 | 501 | * Init all the features state with header_bv being 0x0 |
402 | */ | 502 | */ |
@@ -406,9 +506,21 @@ static void __init setup_xstate_init(void) | |||
406 | * of any feature which is not represented by all zero's. | 506 | * of any feature which is not represented by all zero's. |
407 | */ | 507 | */ |
408 | xsave_state(init_xstate_buf, -1); | 508 | xsave_state(init_xstate_buf, -1); |
409 | stts(); | ||
410 | } | 509 | } |
411 | 510 | ||
511 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; | ||
512 | static int __init eager_fpu_setup(char *s) | ||
513 | { | ||
514 | if (!strcmp(s, "on")) | ||
515 | eagerfpu = ENABLE; | ||
516 | else if (!strcmp(s, "off")) | ||
517 | eagerfpu = DISABLE; | ||
518 | else if (!strcmp(s, "auto")) | ||
519 | eagerfpu = AUTO; | ||
520 | return 1; | ||
521 | } | ||
522 | __setup("eagerfpu=", eager_fpu_setup); | ||
523 | |||
412 | /* | 524 | /* |
413 | * Enable and initialize the xsave feature. | 525 | * Enable and initialize the xsave feature. |
414 | */ | 526 | */ |
@@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void) | |||
445 | 557 | ||
446 | update_regset_xstate_info(xstate_size, pcntxt_mask); | 558 | update_regset_xstate_info(xstate_size, pcntxt_mask); |
447 | prepare_fx_sw_frame(); | 559 | prepare_fx_sw_frame(); |
560 | setup_init_fpu_buf(); | ||
448 | 561 | ||
449 | setup_xstate_init(); | 562 | /* Auto enable eagerfpu for xsaveopt */ |
563 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | ||
564 | eagerfpu = ENABLE; | ||
450 | 565 | ||
451 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 566 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
452 | pcntxt_mask, xstate_size); | 567 | pcntxt_mask, xstate_size); |
@@ -471,3 +586,43 @@ void __cpuinit xsave_init(void) | |||
471 | next_func = xstate_enable; | 586 | next_func = xstate_enable; |
472 | this_func(); | 587 | this_func(); |
473 | } | 588 | } |
589 | |||
590 | static inline void __init eager_fpu_init_bp(void) | ||
591 | { | ||
592 | current->thread.fpu.state = | ||
593 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
594 | if (!init_xstate_buf) | ||
595 | setup_init_fpu_buf(); | ||
596 | } | ||
597 | |||
598 | void __cpuinit eager_fpu_init(void) | ||
599 | { | ||
600 | static __refdata void (*boot_func)(void) = eager_fpu_init_bp; | ||
601 | |||
602 | clear_used_math(); | ||
603 | current_thread_info()->status = 0; | ||
604 | |||
605 | if (eagerfpu == ENABLE) | ||
606 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | ||
607 | |||
608 | if (!cpu_has_eager_fpu) { | ||
609 | stts(); | ||
610 | return; | ||
611 | } | ||
612 | |||
613 | if (boot_func) { | ||
614 | boot_func(); | ||
615 | boot_func = NULL; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * This is same as math_state_restore(). But use_xsave() is | ||
620 | * not yet patched to use math_state_restore(). | ||
621 | */ | ||
622 | init_fpu(current); | ||
623 | __thread_fpu_begin(current); | ||
624 | if (cpu_has_xsave) | ||
625 | xrstor_state(init_xstate_buf, -1); | ||
626 | else | ||
627 | fxrstor_checking(&init_xstate_buf->i387); | ||
628 | } | ||