diff options
Diffstat (limited to 'arch/x86/kernel')
81 files changed, 4557 insertions, 4328 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b62a7667828e..88dd768eab6d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg | |||
| 12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
| 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
| 14 | CFLAGS_REMOVE_ftrace.o = -pg | 14 | CFLAGS_REMOVE_ftrace.o = -pg |
| 15 | CFLAGS_REMOVE_early_printk.o = -pg | ||
| 15 | endif | 16 | endif |
| 16 | 17 | ||
| 17 | # | 18 | # |
| @@ -23,9 +24,9 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | |||
| 23 | CFLAGS_hpet.o := $(nostackp) | 24 | CFLAGS_hpet.o := $(nostackp) |
| 24 | CFLAGS_tsc.o := $(nostackp) | 25 | CFLAGS_tsc.o := $(nostackp) |
| 25 | 26 | ||
| 26 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 27 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
| 27 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 28 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
| 28 | obj-y += time_$(BITS).o ioport.o ldt.o | 29 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o |
| 29 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 30 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o |
| 30 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 31 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
| 31 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 32 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
| @@ -65,6 +66,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | |||
| 65 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 66 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
| 66 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 67 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
| 67 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 68 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
| 69 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | ||
| 68 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 70 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
| 69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 71 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
| 70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 72 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
| @@ -105,6 +107,8 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | |||
| 105 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | 107 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o |
| 106 | obj-$(CONFIG_MICROCODE) += microcode.o | 108 | obj-$(CONFIG_MICROCODE) += microcode.o |
| 107 | 109 | ||
| 110 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | ||
| 111 | |||
| 108 | ### | 112 | ### |
| 109 | # 64 bit specific files | 113 | # 64 bit specific files |
| 110 | ifeq ($(CONFIG_X86_64),y) | 114 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4c51a2f8fd31..65d0b72777ea 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
| @@ -1360,6 +1360,17 @@ static void __init acpi_process_madt(void) | |||
| 1360 | disable_acpi(); | 1360 | disable_acpi(); |
| 1361 | } | 1361 | } |
| 1362 | } | 1362 | } |
| 1363 | |||
| 1364 | /* | ||
| 1365 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | ||
| 1366 | * processors, where MPS only supports physical. | ||
| 1367 | */ | ||
| 1368 | if (acpi_lapic && acpi_ioapic) | ||
| 1369 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " | ||
| 1370 | "information\n"); | ||
| 1371 | else if (acpi_lapic) | ||
| 1372 | printk(KERN_INFO "Using ACPI for processor (LAPIC) " | ||
| 1373 | "configuration information\n"); | ||
| 1363 | #endif | 1374 | #endif |
| 1364 | return; | 1375 | return; |
| 1365 | } | 1376 | } |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0a60d60ed036..2e2da717b350 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/iommu-helper.h> | 24 | #include <linux/iommu-helper.h> |
| 25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
| 26 | #include <asm/iommu.h> | 26 | #include <asm/iommu.h> |
| 27 | #include <asm/gart.h> | ||
| 27 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
| 28 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
| 29 | 30 | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c6cc22815d35..c625800c55ca 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
| 29 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
| 30 | #include <asm/iommu.h> | 30 | #include <asm/iommu.h> |
| 31 | #include <asm/gart.h> | ||
| 31 | 32 | ||
| 32 | /* | 33 | /* |
| 33 | * definitions for the ACPI scanning code | 34 | * definitions for the ACPI scanning code |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9a32b37ee2ee..676debfc1702 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
| @@ -1,8 +1,9 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Firmware replacement code. | 2 | * Firmware replacement code. |
| 3 | * | 3 | * |
| 4 | * Work around broken BIOSes that don't set an aperture or only set the | 4 | * Work around broken BIOSes that don't set an aperture, only set the |
| 5 | * aperture in the AGP bridge. | 5 | * aperture in the AGP bridge, or set too small aperture. |
| 6 | * | ||
| 6 | * If all fails map the aperture over some low memory. This is cheaper than | 7 | * If all fails map the aperture over some low memory. This is cheaper than |
| 7 | * doing bounce buffering. The memory is lost. This is done at early boot | 8 | * doing bounce buffering. The memory is lost. This is done at early boot |
| 8 | * because only the bootmem allocator can allocate 32+MB. | 9 | * because only the bootmem allocator can allocate 32+MB. |
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f94879b525..b5229affb953 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
| 31 | #include <linux/dmi.h> | 31 | #include <linux/dmi.h> |
| 32 | #include <linux/dmar.h> | 32 | #include <linux/dmar.h> |
| 33 | #include <linux/ftrace.h> | ||
| 33 | 34 | ||
| 34 | #include <asm/atomic.h> | 35 | #include <asm/atomic.h> |
| 35 | #include <asm/smp.h> | 36 | #include <asm/smp.h> |
| @@ -441,6 +442,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
| 441 | v = apic_read(APIC_LVTT); | 442 | v = apic_read(APIC_LVTT); |
| 442 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 443 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
| 443 | apic_write(APIC_LVTT, v); | 444 | apic_write(APIC_LVTT, v); |
| 445 | apic_write(APIC_TMICT, 0xffffffff); | ||
| 444 | break; | 446 | break; |
| 445 | case CLOCK_EVT_MODE_RESUME: | 447 | case CLOCK_EVT_MODE_RESUME: |
| 446 | /* Nothing to do here */ | 448 | /* Nothing to do here */ |
| @@ -559,13 +561,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) | |||
| 559 | } else { | 561 | } else { |
| 560 | res = (((u64)deltapm) * mult) >> 22; | 562 | res = (((u64)deltapm) * mult) >> 22; |
| 561 | do_div(res, 1000000); | 563 | do_div(res, 1000000); |
| 562 | printk(KERN_WARNING "APIC calibration not consistent " | 564 | pr_warning("APIC calibration not consistent " |
| 563 | "with PM Timer: %ldms instead of 100ms\n", | 565 | "with PM Timer: %ldms instead of 100ms\n", |
| 564 | (long)res); | 566 | (long)res); |
| 565 | /* Correct the lapic counter value */ | 567 | /* Correct the lapic counter value */ |
| 566 | res = (((u64)(*delta)) * pm_100ms); | 568 | res = (((u64)(*delta)) * pm_100ms); |
| 567 | do_div(res, deltapm); | 569 | do_div(res, deltapm); |
| 568 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | 570 | pr_info("APIC delta adjusted to PM-Timer: " |
| 569 | "%lu (%ld)\n", (unsigned long)res, *delta); | 571 | "%lu (%ld)\n", (unsigned long)res, *delta); |
| 570 | *delta = (long)res; | 572 | *delta = (long)res; |
| 571 | } | 573 | } |
| @@ -645,8 +647,7 @@ static int __init calibrate_APIC_clock(void) | |||
| 645 | */ | 647 | */ |
| 646 | if (calibration_result < (1000000 / HZ)) { | 648 | if (calibration_result < (1000000 / HZ)) { |
| 647 | local_irq_enable(); | 649 | local_irq_enable(); |
| 648 | printk(KERN_WARNING | 650 | pr_warning("APIC frequency too slow, disabling apic timer\n"); |
| 649 | "APIC frequency too slow, disabling apic timer\n"); | ||
| 650 | return -1; | 651 | return -1; |
| 651 | } | 652 | } |
| 652 | 653 | ||
| @@ -672,13 +673,9 @@ static int __init calibrate_APIC_clock(void) | |||
| 672 | while (lapic_cal_loops <= LAPIC_CAL_LOOPS) | 673 | while (lapic_cal_loops <= LAPIC_CAL_LOOPS) |
| 673 | cpu_relax(); | 674 | cpu_relax(); |
| 674 | 675 | ||
| 675 | local_irq_disable(); | ||
| 676 | |||
| 677 | /* Stop the lapic timer */ | 676 | /* Stop the lapic timer */ |
| 678 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); | 677 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); |
| 679 | 678 | ||
| 680 | local_irq_enable(); | ||
| 681 | |||
| 682 | /* Jiffies delta */ | 679 | /* Jiffies delta */ |
| 683 | deltaj = lapic_cal_j2 - lapic_cal_j1; | 680 | deltaj = lapic_cal_j2 - lapic_cal_j1; |
| 684 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); | 681 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); |
| @@ -692,8 +689,7 @@ static int __init calibrate_APIC_clock(void) | |||
| 692 | local_irq_enable(); | 689 | local_irq_enable(); |
| 693 | 690 | ||
| 694 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { | 691 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { |
| 695 | printk(KERN_WARNING | 692 | pr_warning("APIC timer disabled due to verification failure.\n"); |
| 696 | "APIC timer disabled due to verification failure.\n"); | ||
| 697 | return -1; | 693 | return -1; |
| 698 | } | 694 | } |
| 699 | 695 | ||
| @@ -714,7 +710,7 @@ void __init setup_boot_APIC_clock(void) | |||
| 714 | * broadcast mechanism is used. On UP systems simply ignore it. | 710 | * broadcast mechanism is used. On UP systems simply ignore it. |
| 715 | */ | 711 | */ |
| 716 | if (disable_apic_timer) { | 712 | if (disable_apic_timer) { |
| 717 | printk(KERN_INFO "Disabling APIC timer\n"); | 713 | pr_info("Disabling APIC timer\n"); |
| 718 | /* No broadcast on UP ! */ | 714 | /* No broadcast on UP ! */ |
| 719 | if (num_possible_cpus() > 1) { | 715 | if (num_possible_cpus() > 1) { |
| 720 | lapic_clockevent.mult = 1; | 716 | lapic_clockevent.mult = 1; |
| @@ -741,7 +737,7 @@ void __init setup_boot_APIC_clock(void) | |||
| 741 | if (nmi_watchdog != NMI_IO_APIC) | 737 | if (nmi_watchdog != NMI_IO_APIC) |
| 742 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | 738 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; |
| 743 | else | 739 | else |
| 744 | printk(KERN_WARNING "APIC timer registered as dummy," | 740 | pr_warning("APIC timer registered as dummy," |
| 745 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | 741 | " due to nmi_watchdog=%d!\n", nmi_watchdog); |
| 746 | 742 | ||
| 747 | /* Setup the lapic or request the broadcast */ | 743 | /* Setup the lapic or request the broadcast */ |
| @@ -773,8 +769,7 @@ static void local_apic_timer_interrupt(void) | |||
| 773 | * spurious. | 769 | * spurious. |
| 774 | */ | 770 | */ |
| 775 | if (!evt->event_handler) { | 771 | if (!evt->event_handler) { |
| 776 | printk(KERN_WARNING | 772 | pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu); |
| 777 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
| 778 | /* Switch it off */ | 773 | /* Switch it off */ |
| 779 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | 774 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); |
| 780 | return; | 775 | return; |
| @@ -783,11 +778,7 @@ static void local_apic_timer_interrupt(void) | |||
| 783 | /* | 778 | /* |
| 784 | * the NMI deadlock-detector uses this. | 779 | * the NMI deadlock-detector uses this. |
| 785 | */ | 780 | */ |
| 786 | #ifdef CONFIG_X86_64 | 781 | inc_irq_stat(apic_timer_irqs); |
| 787 | add_pda(apic_timer_irqs, 1); | ||
| 788 | #else | ||
| 789 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
| 790 | #endif | ||
| 791 | 782 | ||
| 792 | evt->event_handler(evt); | 783 | evt->event_handler(evt); |
| 793 | } | 784 | } |
| @@ -800,7 +791,7 @@ static void local_apic_timer_interrupt(void) | |||
| 800 | * [ if a single-CPU system runs an SMP kernel then we call the local | 791 | * [ if a single-CPU system runs an SMP kernel then we call the local |
| 801 | * interrupt as well. Thus we cannot inline the local irq ... ] | 792 | * interrupt as well. Thus we cannot inline the local irq ... ] |
| 802 | */ | 793 | */ |
| 803 | void smp_apic_timer_interrupt(struct pt_regs *regs) | 794 | void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) |
| 804 | { | 795 | { |
| 805 | struct pt_regs *old_regs = set_irq_regs(regs); | 796 | struct pt_regs *old_regs = set_irq_regs(regs); |
| 806 | 797 | ||
| @@ -814,9 +805,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
| 814 | * Besides, if we don't timer interrupts ignore the global | 805 | * Besides, if we don't timer interrupts ignore the global |
| 815 | * interrupt lock, which is the WrongThing (tm) to do. | 806 | * interrupt lock, which is the WrongThing (tm) to do. |
| 816 | */ | 807 | */ |
| 817 | #ifdef CONFIG_X86_64 | ||
| 818 | exit_idle(); | 808 | exit_idle(); |
| 819 | #endif | ||
| 820 | irq_enter(); | 809 | irq_enter(); |
| 821 | local_apic_timer_interrupt(); | 810 | local_apic_timer_interrupt(); |
| 822 | irq_exit(); | 811 | irq_exit(); |
| @@ -1093,7 +1082,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
| 1093 | unsigned int oldvalue, value, maxlvt; | 1082 | unsigned int oldvalue, value, maxlvt; |
| 1094 | 1083 | ||
| 1095 | if (!lapic_is_integrated()) { | 1084 | if (!lapic_is_integrated()) { |
| 1096 | printk(KERN_INFO "No ESR for 82489DX.\n"); | 1085 | pr_info("No ESR for 82489DX.\n"); |
| 1097 | return; | 1086 | return; |
| 1098 | } | 1087 | } |
| 1099 | 1088 | ||
| @@ -1104,7 +1093,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
| 1104 | * ESR disabled - we can't do anything useful with the | 1093 | * ESR disabled - we can't do anything useful with the |
| 1105 | * errors anyway - mbligh | 1094 | * errors anyway - mbligh |
| 1106 | */ | 1095 | */ |
| 1107 | printk(KERN_INFO "Leaving ESR disabled.\n"); | 1096 | pr_info("Leaving ESR disabled.\n"); |
| 1108 | return; | 1097 | return; |
| 1109 | } | 1098 | } |
| 1110 | 1099 | ||
| @@ -1298,7 +1287,7 @@ void check_x2apic(void) | |||
| 1298 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1287 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
| 1299 | 1288 | ||
| 1300 | if (msr & X2APIC_ENABLE) { | 1289 | if (msr & X2APIC_ENABLE) { |
| 1301 | printk("x2apic enabled by BIOS, switching to x2apic ops\n"); | 1290 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); |
| 1302 | x2apic_preenabled = x2apic = 1; | 1291 | x2apic_preenabled = x2apic = 1; |
| 1303 | apic_ops = &x2apic_ops; | 1292 | apic_ops = &x2apic_ops; |
| 1304 | } | 1293 | } |
| @@ -1310,7 +1299,7 @@ void enable_x2apic(void) | |||
| 1310 | 1299 | ||
| 1311 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1300 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
| 1312 | if (!(msr & X2APIC_ENABLE)) { | 1301 | if (!(msr & X2APIC_ENABLE)) { |
| 1313 | printk("Enabling x2apic\n"); | 1302 | pr_info("Enabling x2apic\n"); |
| 1314 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | 1303 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); |
| 1315 | } | 1304 | } |
| 1316 | } | 1305 | } |
| @@ -1325,9 +1314,8 @@ void __init enable_IR_x2apic(void) | |||
| 1325 | return; | 1314 | return; |
| 1326 | 1315 | ||
| 1327 | if (!x2apic_preenabled && disable_x2apic) { | 1316 | if (!x2apic_preenabled && disable_x2apic) { |
| 1328 | printk(KERN_INFO | 1317 | pr_info("Skipped enabling x2apic and Interrupt-remapping " |
| 1329 | "Skipped enabling x2apic and Interrupt-remapping " | 1318 | "because of nox2apic\n"); |
| 1330 | "because of nox2apic\n"); | ||
| 1331 | return; | 1319 | return; |
| 1332 | } | 1320 | } |
| 1333 | 1321 | ||
| @@ -1335,22 +1323,19 @@ void __init enable_IR_x2apic(void) | |||
| 1335 | panic("Bios already enabled x2apic, can't enforce nox2apic"); | 1323 | panic("Bios already enabled x2apic, can't enforce nox2apic"); |
| 1336 | 1324 | ||
| 1337 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1325 | if (!x2apic_preenabled && skip_ioapic_setup) { |
| 1338 | printk(KERN_INFO | 1326 | pr_info("Skipped enabling x2apic and Interrupt-remapping " |
| 1339 | "Skipped enabling x2apic and Interrupt-remapping " | 1327 | "because of skipping io-apic setup\n"); |
| 1340 | "because of skipping io-apic setup\n"); | ||
| 1341 | return; | 1328 | return; |
| 1342 | } | 1329 | } |
| 1343 | 1330 | ||
| 1344 | ret = dmar_table_init(); | 1331 | ret = dmar_table_init(); |
| 1345 | if (ret) { | 1332 | if (ret) { |
| 1346 | printk(KERN_INFO | 1333 | pr_info("dmar_table_init() failed with %d:\n", ret); |
| 1347 | "dmar_table_init() failed with %d:\n", ret); | ||
| 1348 | 1334 | ||
| 1349 | if (x2apic_preenabled) | 1335 | if (x2apic_preenabled) |
| 1350 | panic("x2apic enabled by bios. But IR enabling failed"); | 1336 | panic("x2apic enabled by bios. But IR enabling failed"); |
| 1351 | else | 1337 | else |
| 1352 | printk(KERN_INFO | 1338 | pr_info("Not enabling x2apic,Intr-remapping\n"); |
| 1353 | "Not enabling x2apic,Intr-remapping\n"); | ||
| 1354 | return; | 1339 | return; |
| 1355 | } | 1340 | } |
| 1356 | 1341 | ||
| @@ -1359,7 +1344,7 @@ void __init enable_IR_x2apic(void) | |||
| 1359 | 1344 | ||
| 1360 | ret = save_mask_IO_APIC_setup(); | 1345 | ret = save_mask_IO_APIC_setup(); |
| 1361 | if (ret) { | 1346 | if (ret) { |
| 1362 | printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); | 1347 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
| 1363 | goto end; | 1348 | goto end; |
| 1364 | } | 1349 | } |
| 1365 | 1350 | ||
| @@ -1394,14 +1379,11 @@ end: | |||
| 1394 | 1379 | ||
| 1395 | if (!ret) { | 1380 | if (!ret) { |
| 1396 | if (!x2apic_preenabled) | 1381 | if (!x2apic_preenabled) |
| 1397 | printk(KERN_INFO | 1382 | pr_info("Enabled x2apic and interrupt-remapping\n"); |
| 1398 | "Enabled x2apic and interrupt-remapping\n"); | ||
| 1399 | else | 1383 | else |
| 1400 | printk(KERN_INFO | 1384 | pr_info("Enabled Interrupt-remapping\n"); |
| 1401 | "Enabled Interrupt-remapping\n"); | ||
| 1402 | } else | 1385 | } else |
| 1403 | printk(KERN_ERR | 1386 | pr_err("Failed to enable Interrupt-remapping and x2apic\n"); |
| 1404 | "Failed to enable Interrupt-remapping and x2apic\n"); | ||
| 1405 | #else | 1387 | #else |
| 1406 | if (!cpu_has_x2apic) | 1388 | if (!cpu_has_x2apic) |
| 1407 | return; | 1389 | return; |
| @@ -1410,8 +1392,8 @@ end: | |||
| 1410 | panic("x2apic enabled prior OS handover," | 1392 | panic("x2apic enabled prior OS handover," |
| 1411 | " enable CONFIG_INTR_REMAP"); | 1393 | " enable CONFIG_INTR_REMAP"); |
| 1412 | 1394 | ||
| 1413 | printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " | 1395 | pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " |
| 1414 | " and x2apic\n"); | 1396 | " and x2apic\n"); |
| 1415 | #endif | 1397 | #endif |
| 1416 | 1398 | ||
| 1417 | return; | 1399 | return; |
| @@ -1428,7 +1410,7 @@ end: | |||
| 1428 | static int __init detect_init_APIC(void) | 1410 | static int __init detect_init_APIC(void) |
| 1429 | { | 1411 | { |
| 1430 | if (!cpu_has_apic) { | 1412 | if (!cpu_has_apic) { |
| 1431 | printk(KERN_INFO "No local APIC present\n"); | 1413 | pr_info("No local APIC present\n"); |
| 1432 | return -1; | 1414 | return -1; |
| 1433 | } | 1415 | } |
| 1434 | 1416 | ||
| @@ -1469,8 +1451,8 @@ static int __init detect_init_APIC(void) | |||
| 1469 | * "lapic" specified. | 1451 | * "lapic" specified. |
| 1470 | */ | 1452 | */ |
| 1471 | if (!force_enable_local_apic) { | 1453 | if (!force_enable_local_apic) { |
| 1472 | printk(KERN_INFO "Local APIC disabled by BIOS -- " | 1454 | pr_info("Local APIC disabled by BIOS -- " |
| 1473 | "you can enable it with \"lapic\"\n"); | 1455 | "you can enable it with \"lapic\"\n"); |
| 1474 | return -1; | 1456 | return -1; |
| 1475 | } | 1457 | } |
| 1476 | /* | 1458 | /* |
| @@ -1480,8 +1462,7 @@ static int __init detect_init_APIC(void) | |||
| 1480 | */ | 1462 | */ |
| 1481 | rdmsr(MSR_IA32_APICBASE, l, h); | 1463 | rdmsr(MSR_IA32_APICBASE, l, h); |
| 1482 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | 1464 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { |
| 1483 | printk(KERN_INFO | 1465 | pr_info("Local APIC disabled by BIOS -- reenabling.\n"); |
| 1484 | "Local APIC disabled by BIOS -- reenabling.\n"); | ||
| 1485 | l &= ~MSR_IA32_APICBASE_BASE; | 1466 | l &= ~MSR_IA32_APICBASE_BASE; |
| 1486 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | 1467 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; |
| 1487 | wrmsr(MSR_IA32_APICBASE, l, h); | 1468 | wrmsr(MSR_IA32_APICBASE, l, h); |
| @@ -1494,7 +1475,7 @@ static int __init detect_init_APIC(void) | |||
| 1494 | */ | 1475 | */ |
| 1495 | features = cpuid_edx(1); | 1476 | features = cpuid_edx(1); |
| 1496 | if (!(features & (1 << X86_FEATURE_APIC))) { | 1477 | if (!(features & (1 << X86_FEATURE_APIC))) { |
| 1497 | printk(KERN_WARNING "Could not enable APIC!\n"); | 1478 | pr_warning("Could not enable APIC!\n"); |
| 1498 | return -1; | 1479 | return -1; |
| 1499 | } | 1480 | } |
| 1500 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1481 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
| @@ -1505,14 +1486,14 @@ static int __init detect_init_APIC(void) | |||
| 1505 | if (l & MSR_IA32_APICBASE_ENABLE) | 1486 | if (l & MSR_IA32_APICBASE_ENABLE) |
| 1506 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; | 1487 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; |
| 1507 | 1488 | ||
| 1508 | printk(KERN_INFO "Found and enabled local APIC!\n"); | 1489 | pr_info("Found and enabled local APIC!\n"); |
| 1509 | 1490 | ||
| 1510 | apic_pm_activate(); | 1491 | apic_pm_activate(); |
| 1511 | 1492 | ||
| 1512 | return 0; | 1493 | return 0; |
| 1513 | 1494 | ||
| 1514 | no_apic: | 1495 | no_apic: |
| 1515 | printk(KERN_INFO "No local APIC present or hardware disabled\n"); | 1496 | pr_info("No local APIC present or hardware disabled\n"); |
| 1516 | return -1; | 1497 | return -1; |
| 1517 | } | 1498 | } |
| 1518 | #endif | 1499 | #endif |
| @@ -1588,12 +1569,12 @@ int __init APIC_init_uniprocessor(void) | |||
| 1588 | { | 1569 | { |
| 1589 | #ifdef CONFIG_X86_64 | 1570 | #ifdef CONFIG_X86_64 |
| 1590 | if (disable_apic) { | 1571 | if (disable_apic) { |
| 1591 | printk(KERN_INFO "Apic disabled\n"); | 1572 | pr_info("Apic disabled\n"); |
| 1592 | return -1; | 1573 | return -1; |
| 1593 | } | 1574 | } |
| 1594 | if (!cpu_has_apic) { | 1575 | if (!cpu_has_apic) { |
| 1595 | disable_apic = 1; | 1576 | disable_apic = 1; |
| 1596 | printk(KERN_INFO "Apic disabled by BIOS\n"); | 1577 | pr_info("Apic disabled by BIOS\n"); |
| 1597 | return -1; | 1578 | return -1; |
| 1598 | } | 1579 | } |
| 1599 | #else | 1580 | #else |
| @@ -1605,8 +1586,8 @@ int __init APIC_init_uniprocessor(void) | |||
| 1605 | */ | 1586 | */ |
| 1606 | if (!cpu_has_apic && | 1587 | if (!cpu_has_apic && |
| 1607 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1588 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
| 1608 | printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", | 1589 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", |
| 1609 | boot_cpu_physical_apicid); | 1590 | boot_cpu_physical_apicid); |
| 1610 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1591 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
| 1611 | return -1; | 1592 | return -1; |
| 1612 | } | 1593 | } |
| @@ -1682,9 +1663,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
| 1682 | { | 1663 | { |
| 1683 | u32 v; | 1664 | u32 v; |
| 1684 | 1665 | ||
| 1685 | #ifdef CONFIG_X86_64 | ||
| 1686 | exit_idle(); | 1666 | exit_idle(); |
| 1687 | #endif | ||
| 1688 | irq_enter(); | 1667 | irq_enter(); |
| 1689 | /* | 1668 | /* |
| 1690 | * Check if this really is a spurious interrupt and ACK it | 1669 | * Check if this really is a spurious interrupt and ACK it |
| @@ -1695,14 +1674,11 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
| 1695 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | 1674 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) |
| 1696 | ack_APIC_irq(); | 1675 | ack_APIC_irq(); |
| 1697 | 1676 | ||
| 1698 | #ifdef CONFIG_X86_64 | 1677 | inc_irq_stat(irq_spurious_count); |
| 1699 | add_pda(irq_spurious_count, 1); | 1678 | |
| 1700 | #else | ||
| 1701 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ | 1679 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ |
| 1702 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " | 1680 | pr_info("spurious APIC interrupt on CPU#%d, " |
| 1703 | "should never happen.\n", smp_processor_id()); | 1681 | "should never happen.\n", smp_processor_id()); |
| 1704 | __get_cpu_var(irq_stat).irq_spurious_count++; | ||
| 1705 | #endif | ||
| 1706 | irq_exit(); | 1682 | irq_exit(); |
| 1707 | } | 1683 | } |
| 1708 | 1684 | ||
| @@ -1713,9 +1689,7 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
| 1713 | { | 1689 | { |
| 1714 | u32 v, v1; | 1690 | u32 v, v1; |
| 1715 | 1691 | ||
| 1716 | #ifdef CONFIG_X86_64 | ||
| 1717 | exit_idle(); | 1692 | exit_idle(); |
| 1718 | #endif | ||
| 1719 | irq_enter(); | 1693 | irq_enter(); |
| 1720 | /* First tickle the hardware, only then report what went on. -- REW */ | 1694 | /* First tickle the hardware, only then report what went on. -- REW */ |
| 1721 | v = apic_read(APIC_ESR); | 1695 | v = apic_read(APIC_ESR); |
| @@ -1724,17 +1698,18 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
| 1724 | ack_APIC_irq(); | 1698 | ack_APIC_irq(); |
| 1725 | atomic_inc(&irq_err_count); | 1699 | atomic_inc(&irq_err_count); |
| 1726 | 1700 | ||
| 1727 | /* Here is what the APIC error bits mean: | 1701 | /* |
| 1728 | 0: Send CS error | 1702 | * Here is what the APIC error bits mean: |
| 1729 | 1: Receive CS error | 1703 | * 0: Send CS error |
| 1730 | 2: Send accept error | 1704 | * 1: Receive CS error |
| 1731 | 3: Receive accept error | 1705 | * 2: Send accept error |
| 1732 | 4: Reserved | 1706 | * 3: Receive accept error |
| 1733 | 5: Send illegal vector | 1707 | * 4: Reserved |
| 1734 | 6: Received illegal vector | 1708 | * 5: Send illegal vector |
| 1735 | 7: Illegal register address | 1709 | * 6: Received illegal vector |
| 1736 | */ | 1710 | * 7: Illegal register address |
| 1737 | printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", | 1711 | */ |
| 1712 | pr_debug("APIC error on CPU%d: %02x(%02x)\n", | ||
| 1738 | smp_processor_id(), v , v1); | 1713 | smp_processor_id(), v , v1); |
| 1739 | irq_exit(); | 1714 | irq_exit(); |
| 1740 | } | 1715 | } |
| @@ -1838,15 +1813,15 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1838 | * Validate version | 1813 | * Validate version |
| 1839 | */ | 1814 | */ |
| 1840 | if (version == 0x0) { | 1815 | if (version == 0x0) { |
| 1841 | printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " | 1816 | pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " |
| 1842 | "fixing up to 0x10. (tell your hw vendor)\n", | 1817 | "fixing up to 0x10. (tell your hw vendor)\n", |
| 1843 | version); | 1818 | version); |
| 1844 | version = 0x10; | 1819 | version = 0x10; |
| 1845 | } | 1820 | } |
| 1846 | apic_version[apicid] = version; | 1821 | apic_version[apicid] = version; |
| 1847 | 1822 | ||
| 1848 | if (num_processors >= NR_CPUS) { | 1823 | if (num_processors >= NR_CPUS) { |
| 1849 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | 1824 | pr_warning("WARNING: NR_CPUS limit of %i reached." |
| 1850 | " Processor ignored.\n", NR_CPUS); | 1825 | " Processor ignored.\n", NR_CPUS); |
| 1851 | return; | 1826 | return; |
| 1852 | } | 1827 | } |
| @@ -2209,7 +2184,7 @@ static int __init apic_set_verbosity(char *arg) | |||
| 2209 | else if (strcmp("verbose", arg) == 0) | 2184 | else if (strcmp("verbose", arg) == 0) |
| 2210 | apic_verbosity = APIC_VERBOSE; | 2185 | apic_verbosity = APIC_VERBOSE; |
| 2211 | else { | 2186 | else { |
| 2212 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | 2187 | pr_warning("APIC Verbosity level %s not recognised" |
| 2213 | " use apic=verbose or apic=debug\n", arg); | 2188 | " use apic=verbose or apic=debug\n", arg); |
| 2214 | return -EINVAL; | 2189 | return -EINVAL; |
| 2215 | } | 2190 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 5145a6e72bbb..3a26525a3f31 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -391,11 +391,7 @@ static int power_off; | |||
| 391 | #else | 391 | #else |
| 392 | static int power_off = 1; | 392 | static int power_off = 1; |
| 393 | #endif | 393 | #endif |
| 394 | #ifdef CONFIG_APM_REAL_MODE_POWER_OFF | ||
| 395 | static int realmode_power_off = 1; | ||
| 396 | #else | ||
| 397 | static int realmode_power_off; | 394 | static int realmode_power_off; |
| 398 | #endif | ||
| 399 | #ifdef CONFIG_APM_ALLOW_INTS | 395 | #ifdef CONFIG_APM_ALLOW_INTS |
| 400 | static int allow_ints = 1; | 396 | static int allow_ints = 1; |
| 401 | #else | 397 | #else |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6649d09ad88f..ee4df08feee6 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include <linux/suspend.h> | 11 | #include <linux/suspend.h> |
| 12 | #include <linux/kbuild.h> | 12 | #include <linux/kbuild.h> |
| 13 | #include <asm/ucontext.h> | 13 | #include <asm/ucontext.h> |
| 14 | #include "sigframe.h" | 14 | #include <asm/sigframe.h> |
| 15 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
| 16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
| 17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 7fcf63d22f8b..1d41d3f1edbc 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
| @@ -20,6 +20,8 @@ | |||
| 20 | 20 | ||
| 21 | #include <xen/interface/xen.h> | 21 | #include <xen/interface/xen.h> |
| 22 | 22 | ||
| 23 | #include <asm/sigframe.h> | ||
| 24 | |||
| 23 | #define __NO_STUBS 1 | 25 | #define __NO_STUBS 1 |
| 24 | #undef __SYSCALL | 26 | #undef __SYSCALL |
| 25 | #undef _ASM_X86_UNISTD_64_H | 27 | #undef _ASM_X86_UNISTD_64_H |
| @@ -87,7 +89,7 @@ int main(void) | |||
| 87 | BLANK(); | 89 | BLANK(); |
| 88 | #undef ENTRY | 90 | #undef ENTRY |
| 89 | DEFINE(IA32_RT_SIGFRAME_sigcontext, | 91 | DEFINE(IA32_RT_SIGFRAME_sigcontext, |
| 90 | offsetof (struct rt_sigframe32, uc.uc_mcontext)); | 92 | offsetof (struct rt_sigframe_ia32, uc.uc_mcontext)); |
| 91 | BLANK(); | 93 | BLANK(); |
| 92 | #endif | 94 | #endif |
| 93 | DEFINE(pbe_address, offsetof(struct pbe, address)); | 95 | DEFINE(pbe_address, offsetof(struct pbe, address)); |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index f0dfe6f17e7e..2a0a2a3cac26 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
| @@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | |||
| 69 | 69 | ||
| 70 | long sn_partition_id; | 70 | long sn_partition_id; |
| 71 | EXPORT_SYMBOL_GPL(sn_partition_id); | 71 | EXPORT_SYMBOL_GPL(sn_partition_id); |
| 72 | long uv_coherency_id; | 72 | long sn_coherency_id; |
| 73 | EXPORT_SYMBOL_GPL(uv_coherency_id); | 73 | EXPORT_SYMBOL_GPL(sn_coherency_id); |
| 74 | long uv_region_size; | 74 | long sn_region_size; |
| 75 | EXPORT_SYMBOL_GPL(uv_region_size); | 75 | EXPORT_SYMBOL_GPL(sn_region_size); |
| 76 | int uv_type; | 76 | int uv_type; |
| 77 | 77 | ||
| 78 | 78 | ||
| @@ -100,6 +100,56 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | |||
| 100 | return ret; | 100 | return ret; |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | int | ||
| 104 | uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size, | ||
| 105 | unsigned long *intr_mmr_offset) | ||
| 106 | { | ||
| 107 | union uv_watchlist_u size_blade; | ||
| 108 | u64 watchlist; | ||
| 109 | s64 ret; | ||
| 110 | |||
| 111 | size_blade.size = mq_size; | ||
| 112 | size_blade.blade = blade; | ||
| 113 | |||
| 114 | /* | ||
| 115 | * bios returns watchlist number or negative error number. | ||
| 116 | */ | ||
| 117 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, | ||
| 118 | size_blade.val, (u64)intr_mmr_offset, | ||
| 119 | (u64)&watchlist, 0); | ||
| 120 | if (ret < BIOS_STATUS_SUCCESS) | ||
| 121 | return ret; | ||
| 122 | |||
| 123 | return watchlist; | ||
| 124 | } | ||
| 125 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); | ||
| 126 | |||
| 127 | int | ||
| 128 | uv_bios_mq_watchlist_free(int blade, int watchlist_num) | ||
| 129 | { | ||
| 130 | return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, | ||
| 131 | blade, watchlist_num, 0, 0, 0); | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); | ||
| 134 | |||
| 135 | s64 | ||
| 136 | uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) | ||
| 137 | { | ||
| 138 | return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, | ||
| 139 | perms, 0, 0); | ||
| 140 | } | ||
| 141 | EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); | ||
| 142 | |||
| 143 | s64 | ||
| 144 | uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) | ||
| 145 | { | ||
| 146 | s64 ret; | ||
| 147 | |||
| 148 | ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, | ||
| 149 | (u64)addr, buf, (u64)len, 0); | ||
| 150 | return ret; | ||
| 151 | } | ||
| 152 | EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); | ||
| 103 | 153 | ||
| 104 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | 154 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) |
| 105 | { | 155 | { |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c new file mode 100644 index 000000000000..2ac0ab71412a --- /dev/null +++ b/arch/x86/kernel/check.c | |||
| @@ -0,0 +1,161 @@ | |||
| 1 | #include <linux/module.h> | ||
| 2 | #include <linux/sched.h> | ||
| 3 | #include <linux/kthread.h> | ||
| 4 | #include <linux/workqueue.h> | ||
| 5 | #include <asm/e820.h> | ||
| 6 | #include <asm/proto.h> | ||
| 7 | |||
| 8 | /* | ||
| 9 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
| 10 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
| 11 | * remaining free memory in that area and fill it with a distinct | ||
| 12 | * pattern. | ||
| 13 | */ | ||
| 14 | #define MAX_SCAN_AREAS 8 | ||
| 15 | |||
| 16 | static int __read_mostly memory_corruption_check = -1; | ||
| 17 | |||
| 18 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
| 19 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
| 20 | |||
| 21 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
| 22 | static int num_scan_areas; | ||
| 23 | |||
| 24 | |||
| 25 | static __init int set_corruption_check(char *arg) | ||
| 26 | { | ||
| 27 | char *end; | ||
| 28 | |||
| 29 | memory_corruption_check = simple_strtol(arg, &end, 10); | ||
| 30 | |||
| 31 | return (*end == 0) ? 0 : -EINVAL; | ||
| 32 | } | ||
| 33 | early_param("memory_corruption_check", set_corruption_check); | ||
| 34 | |||
| 35 | static __init int set_corruption_check_period(char *arg) | ||
| 36 | { | ||
| 37 | char *end; | ||
| 38 | |||
| 39 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
| 40 | |||
| 41 | return (*end == 0) ? 0 : -EINVAL; | ||
| 42 | } | ||
| 43 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
| 44 | |||
| 45 | static __init int set_corruption_check_size(char *arg) | ||
| 46 | { | ||
| 47 | char *end; | ||
| 48 | unsigned size; | ||
| 49 | |||
| 50 | size = memparse(arg, &end); | ||
| 51 | |||
| 52 | if (*end == '\0') | ||
| 53 | corruption_check_size = size; | ||
| 54 | |||
| 55 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
| 56 | } | ||
| 57 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
| 58 | |||
| 59 | |||
| 60 | void __init setup_bios_corruption_check(void) | ||
| 61 | { | ||
| 62 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
| 63 | |||
| 64 | if (memory_corruption_check == -1) { | ||
| 65 | memory_corruption_check = | ||
| 66 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
| 67 | 1 | ||
| 68 | #else | ||
| 69 | 0 | ||
| 70 | #endif | ||
| 71 | ; | ||
| 72 | } | ||
| 73 | |||
| 74 | if (corruption_check_size == 0) | ||
| 75 | memory_corruption_check = 0; | ||
| 76 | |||
| 77 | if (!memory_corruption_check) | ||
| 78 | return; | ||
| 79 | |||
| 80 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
| 81 | |||
| 82 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | ||
| 83 | u64 size; | ||
| 84 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
| 85 | |||
| 86 | if (addr == 0) | ||
| 87 | break; | ||
| 88 | |||
| 89 | if ((addr + size) > corruption_check_size) | ||
| 90 | size = corruption_check_size - addr; | ||
| 91 | |||
| 92 | if (size == 0) | ||
| 93 | break; | ||
| 94 | |||
| 95 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
| 96 | scan_areas[num_scan_areas].addr = addr; | ||
| 97 | scan_areas[num_scan_areas].size = size; | ||
| 98 | num_scan_areas++; | ||
| 99 | |||
| 100 | /* Assume we've already mapped this early memory */ | ||
| 101 | memset(__va(addr), 0, size); | ||
| 102 | |||
| 103 | addr += size; | ||
| 104 | } | ||
| 105 | |||
| 106 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
| 107 | num_scan_areas); | ||
| 108 | update_e820(); | ||
| 109 | } | ||
| 110 | |||
| 111 | |||
| 112 | void check_for_bios_corruption(void) | ||
| 113 | { | ||
| 114 | int i; | ||
| 115 | int corruption = 0; | ||
| 116 | |||
| 117 | if (!memory_corruption_check) | ||
| 118 | return; | ||
| 119 | |||
| 120 | for (i = 0; i < num_scan_areas; i++) { | ||
| 121 | unsigned long *addr = __va(scan_areas[i].addr); | ||
| 122 | unsigned long size = scan_areas[i].size; | ||
| 123 | |||
| 124 | for (; size; addr++, size -= sizeof(unsigned long)) { | ||
| 125 | if (!*addr) | ||
| 126 | continue; | ||
| 127 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
| 128 | addr, __pa(addr), *addr); | ||
| 129 | corruption = 1; | ||
| 130 | *addr = 0; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
| 135 | } | ||
| 136 | |||
| 137 | static void check_corruption(struct work_struct *dummy); | ||
| 138 | static DECLARE_DELAYED_WORK(bios_check_work, check_corruption); | ||
| 139 | |||
| 140 | static void check_corruption(struct work_struct *dummy) | ||
| 141 | { | ||
| 142 | check_for_bios_corruption(); | ||
| 143 | schedule_delayed_work(&bios_check_work, | ||
| 144 | round_jiffies_relative(corruption_check_period*HZ)); | ||
| 145 | } | ||
| 146 | |||
| 147 | static int start_periodic_check_for_corruption(void) | ||
| 148 | { | ||
| 149 | if (!memory_corruption_check || corruption_check_period == 0) | ||
| 150 | return 0; | ||
| 151 | |||
| 152 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
| 153 | corruption_check_period); | ||
| 154 | |||
| 155 | /* First time we run the checks right away */ | ||
| 156 | schedule_delayed_work(&bios_check_work, 0); | ||
| 157 | return 0; | ||
| 158 | } | ||
| 159 | |||
| 160 | module_init(start_periodic_check_for_corruption); | ||
| 161 | |||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 82ec6075c057..82db7f45e2de 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
| @@ -2,8 +2,14 @@ | |||
| 2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details and quirks |
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | # Don't trace early stages of a secondary CPU boot | ||
| 6 | ifdef CONFIG_FUNCTION_TRACER | ||
| 7 | CFLAGS_REMOVE_common.o = -pg | ||
| 8 | endif | ||
| 9 | |||
| 5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 10 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
| 6 | obj-y += proc.o capflags.o powerflags.o common.o | 11 | obj-y += proc.o capflags.o powerflags.o common.o |
| 12 | obj-y += vmware.o hypervisor.o | ||
| 7 | 13 | ||
| 8 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 14 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
| 9 | obj-$(CONFIG_X86_64) += bugs_64.o | 15 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index ef8f831af823..2cf23634b6d9 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
| @@ -120,9 +120,17 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
| 120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) | 120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) |
| 121 | & core_select_mask; | 121 | & core_select_mask; |
| 122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); | 122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); |
| 123 | /* | ||
| 124 | * Reinit the apicid, now that we have extended initial_apicid. | ||
| 125 | */ | ||
| 126 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | ||
| 123 | #else | 127 | #else |
| 124 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; | 128 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; |
| 125 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); | 129 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); |
| 130 | /* | ||
| 131 | * Reinit the apicid, now that we have extended initial_apicid. | ||
| 132 | */ | ||
| 133 | c->apicid = phys_pkg_id(0); | ||
| 126 | #endif | 134 | #endif |
| 127 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); | 135 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); |
| 128 | 136 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8f1e31db2ad5..7c878f6aa919 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -283,9 +283,14 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
| 283 | { | 283 | { |
| 284 | early_init_amd_mc(c); | 284 | early_init_amd_mc(c); |
| 285 | 285 | ||
| 286 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | 286 | /* |
| 287 | if (c->x86_power & (1<<8)) | 287 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate |
| 288 | * with P/T states and does not stop in deep C-states | ||
| 289 | */ | ||
| 290 | if (c->x86_power & (1 << 8)) { | ||
| 288 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 291 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
| 292 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | ||
| 293 | } | ||
| 289 | 294 | ||
| 290 | #ifdef CONFIG_X86_64 | 295 | #ifdef CONFIG_X86_64 |
| 291 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | 296 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b9c9ea0217a9..42e0853030cb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <asm/proto.h> | 36 | #include <asm/proto.h> |
| 37 | #include <asm/sections.h> | 37 | #include <asm/sections.h> |
| 38 | #include <asm/setup.h> | 38 | #include <asm/setup.h> |
| 39 | #include <asm/hypervisor.h> | ||
| 39 | 40 | ||
| 40 | #include "cpu.h" | 41 | #include "cpu.h" |
| 41 | 42 | ||
| @@ -703,6 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 703 | detect_ht(c); | 704 | detect_ht(c); |
| 704 | #endif | 705 | #endif |
| 705 | 706 | ||
| 707 | init_hypervisor(c); | ||
| 706 | /* | 708 | /* |
| 707 | * On SMP, boot_cpu_data holds the common feature set between | 709 | * On SMP, boot_cpu_data holds the common feature set between |
| 708 | * all CPUs; so make sure that we indicate which features are | 710 | * all CPUs; so make sure that we indicate which features are |
| @@ -862,7 +864,7 @@ EXPORT_SYMBOL(_cpu_pda); | |||
| 862 | 864 | ||
| 863 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 865 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; |
| 864 | 866 | ||
| 865 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | 867 | static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; |
| 866 | 868 | ||
| 867 | void __cpuinit pda_init(int cpu) | 869 | void __cpuinit pda_init(int cpu) |
| 868 | { | 870 | { |
| @@ -903,8 +905,8 @@ void __cpuinit pda_init(int cpu) | |||
| 903 | } | 905 | } |
| 904 | } | 906 | } |
| 905 | 907 | ||
| 906 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | 908 | static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + |
| 907 | DEBUG_STKSZ] __page_aligned_bss; | 909 | DEBUG_STKSZ] __page_aligned_bss; |
| 908 | 910 | ||
| 909 | extern asmlinkage void ignore_sysret(void); | 911 | extern asmlinkage void ignore_sysret(void); |
| 910 | 912 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8e48c5d4467d..88ea02dcb622 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
| 34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
| 35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
| 36 | #include <linux/ftrace.h> | ||
| 36 | 37 | ||
| 37 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
| 38 | #include <acpi/processor.h> | 39 | #include <acpi/processor.h> |
| @@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
| 391 | unsigned int next_perf_state = 0; /* Index into perf table */ | 392 | unsigned int next_perf_state = 0; /* Index into perf table */ |
| 392 | unsigned int i; | 393 | unsigned int i; |
| 393 | int result = 0; | 394 | int result = 0; |
| 395 | struct power_trace it; | ||
| 394 | 396 | ||
| 395 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); | 397 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); |
| 396 | 398 | ||
| @@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
| 427 | } | 429 | } |
| 428 | } | 430 | } |
| 429 | 431 | ||
| 432 | trace_power_mark(&it, POWER_PSTATE, next_perf_state); | ||
| 433 | |||
| 430 | switch (data->cpu_feature) { | 434 | switch (data->cpu_feature) { |
| 431 | case SYSTEM_INTEL_MSR_CAPABLE: | 435 | case SYSTEM_INTEL_MSR_CAPABLE: |
| 432 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 436 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c new file mode 100644 index 000000000000..fb5b86af0b01 --- /dev/null +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | /* | ||
| 2 | * Common hypervisor code | ||
| 3 | * | ||
| 4 | * Copyright (C) 2008, VMware, Inc. | ||
| 5 | * Author : Alok N Kataria <akataria@vmware.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License as published by | ||
| 9 | * the Free Software Foundation; either version 2 of the License, or | ||
| 10 | * (at your option) any later version. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, but | ||
| 13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 15 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 16 | * details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public License | ||
| 19 | * along with this program; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | #include <asm/processor.h> | ||
| 25 | #include <asm/vmware.h> | ||
| 26 | #include <asm/hypervisor.h> | ||
| 27 | |||
| 28 | static inline void __cpuinit | ||
| 29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | ||
| 30 | { | ||
| 31 | if (vmware_platform()) { | ||
| 32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | ||
| 33 | } else { | ||
| 34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | unsigned long get_hypervisor_tsc_freq(void) | ||
| 39 | { | ||
| 40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | ||
| 41 | return vmware_get_tsc_khz(); | ||
| 42 | return 0; | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline void __cpuinit | ||
| 46 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | ||
| 47 | { | ||
| 48 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | ||
| 49 | vmware_set_feature_bits(c); | ||
| 50 | return; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | ||
| 55 | { | ||
| 56 | detect_hypervisor_vendor(c); | ||
| 57 | hypervisor_set_feature_bits(c); | ||
| 58 | } | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cce0b6118d55..8ea6929e974c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
| 12 | #include <asm/msr.h> | 12 | #include <asm/msr.h> |
| 13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
| 14 | #include <asm/ptrace.h> | ||
| 15 | #include <asm/ds.h> | 14 | #include <asm/ds.h> |
| 16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
| 17 | 16 | ||
| @@ -41,6 +40,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
| 41 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | 40 | if (c->x86 == 15 && c->x86_cache_alignment == 64) |
| 42 | c->x86_cache_alignment = 128; | 41 | c->x86_cache_alignment = 128; |
| 43 | #endif | 42 | #endif |
| 43 | |||
| 44 | /* | ||
| 45 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate | ||
| 46 | * with P/T states and does not stop in deep C-states | ||
| 47 | */ | ||
| 48 | if (c->x86_power & (1 << 8)) { | ||
| 49 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 50 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | ||
| 51 | } | ||
| 52 | |||
| 44 | } | 53 | } |
| 45 | 54 | ||
| 46 | #ifdef CONFIG_X86_32 | 55 | #ifdef CONFIG_X86_32 |
| @@ -242,6 +251,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 242 | 251 | ||
| 243 | intel_workarounds(c); | 252 | intel_workarounds(c); |
| 244 | 253 | ||
| 254 | /* | ||
| 255 | * Detect the extended topology information if available. This | ||
| 256 | * will reinitialise the initial_apicid which will be used | ||
| 257 | * in init_intel_cacheinfo() | ||
| 258 | */ | ||
| 259 | detect_extended_topology(c); | ||
| 260 | |||
| 245 | l2 = init_intel_cacheinfo(c); | 261 | l2 = init_intel_cacheinfo(c); |
| 246 | if (c->cpuid_level > 9) { | 262 | if (c->cpuid_level > 9) { |
| 247 | unsigned eax = cpuid_eax(10); | 263 | unsigned eax = cpuid_eax(10); |
| @@ -307,13 +323,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 307 | set_cpu_cap(c, X86_FEATURE_P4); | 323 | set_cpu_cap(c, X86_FEATURE_P4); |
| 308 | if (c->x86 == 6) | 324 | if (c->x86 == 6) |
| 309 | set_cpu_cap(c, X86_FEATURE_P3); | 325 | set_cpu_cap(c, X86_FEATURE_P3); |
| 310 | |||
| 311 | if (cpu_has_bts) | ||
| 312 | ptrace_bts_init_intel(c); | ||
| 313 | |||
| 314 | #endif | 326 | #endif |
| 315 | 327 | ||
| 316 | detect_extended_topology(c); | ||
| 317 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | 328 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { |
| 318 | /* | 329 | /* |
| 319 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | 330 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 3f46afbb1cf1..68b5d8681cbb 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -644,20 +644,17 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) | |||
| 644 | return show_shared_cpu_map_func(leaf, 1, buf); | 644 | return show_shared_cpu_map_func(leaf, 1, buf); |
| 645 | } | 645 | } |
| 646 | 646 | ||
| 647 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { | 647 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) |
| 648 | switch(this_leaf->eax.split.type) { | 648 | { |
| 649 | case CACHE_TYPE_DATA: | 649 | switch (this_leaf->eax.split.type) { |
| 650 | case CACHE_TYPE_DATA: | ||
| 650 | return sprintf(buf, "Data\n"); | 651 | return sprintf(buf, "Data\n"); |
| 651 | break; | 652 | case CACHE_TYPE_INST: |
| 652 | case CACHE_TYPE_INST: | ||
| 653 | return sprintf(buf, "Instruction\n"); | 653 | return sprintf(buf, "Instruction\n"); |
| 654 | break; | 654 | case CACHE_TYPE_UNIFIED: |
| 655 | case CACHE_TYPE_UNIFIED: | ||
| 656 | return sprintf(buf, "Unified\n"); | 655 | return sprintf(buf, "Unified\n"); |
| 657 | break; | 656 | default: |
| 658 | default: | ||
| 659 | return sprintf(buf, "Unknown\n"); | 657 | return sprintf(buf, "Unknown\n"); |
| 660 | break; | ||
| 661 | } | 658 | } |
| 662 | } | 659 | } |
| 663 | 660 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 5eb390a4b2e9..748c8f9e7a05 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
| @@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void) | |||
| 237 | } | 237 | } |
| 238 | } | 238 | } |
| 239 | out: | 239 | out: |
| 240 | add_pda(irq_threshold_count, 1); | 240 | inc_irq_stat(irq_threshold_count); |
| 241 | irq_exit(); | 241 | irq_exit(); |
| 242 | } | 242 | } |
| 243 | 243 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index c17eaf5dd6dd..4b48f251fd39 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c | |||
| @@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void) | |||
| 26 | if (therm_throt_process(msr_val & 1)) | 26 | if (therm_throt_process(msr_val & 1)) |
| 27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); | 27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); |
| 28 | 28 | ||
| 29 | add_pda(irq_thermal_count, 1); | 29 | inc_irq_stat(irq_thermal_count); |
| 30 | irq_exit(); | 30 | irq_exit(); |
| 31 | } | 31 | } |
| 32 | 32 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c78c04821ea1..1159e269e596 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
| @@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
| 803 | } | 803 | } |
| 804 | 804 | ||
| 805 | static struct res_range __initdata range[RANGE_NUM]; | 805 | static struct res_range __initdata range[RANGE_NUM]; |
| 806 | static int __initdata nr_range; | ||
| 806 | 807 | ||
| 807 | #ifdef CONFIG_MTRR_SANITIZER | 808 | #ifdef CONFIG_MTRR_SANITIZER |
| 808 | 809 | ||
| @@ -1206,39 +1207,43 @@ struct mtrr_cleanup_result { | |||
| 1206 | #define PSHIFT (PAGE_SHIFT - 10) | 1207 | #define PSHIFT (PAGE_SHIFT - 10) |
| 1207 | 1208 | ||
| 1208 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | 1209 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; |
| 1209 | static struct res_range __initdata range_new[RANGE_NUM]; | ||
| 1210 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | 1210 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; |
| 1211 | 1211 | ||
| 1212 | static int __init mtrr_cleanup(unsigned address_bits) | 1212 | static void __init print_out_mtrr_range_state(void) |
| 1213 | { | 1213 | { |
| 1214 | unsigned long extra_remove_base, extra_remove_size; | ||
| 1215 | unsigned long base, size, def, dummy; | ||
| 1216 | mtrr_type type; | ||
| 1217 | int nr_range, nr_range_new; | ||
| 1218 | u64 chunk_size, gran_size; | ||
| 1219 | unsigned long range_sums, range_sums_new; | ||
| 1220 | int index_good; | ||
| 1221 | int num_reg_good; | ||
| 1222 | int i; | 1214 | int i; |
| 1215 | char start_factor = 'K', size_factor = 'K'; | ||
| 1216 | unsigned long start_base, size_base; | ||
| 1217 | mtrr_type type; | ||
| 1223 | 1218 | ||
| 1224 | /* extra one for all 0 */ | 1219 | for (i = 0; i < num_var_ranges; i++) { |
| 1225 | int num[MTRR_NUM_TYPES + 1]; | ||
| 1226 | 1220 | ||
| 1227 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | 1221 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); |
| 1228 | return 0; | 1222 | if (!size_base) |
| 1229 | rdmsr(MTRRdefType_MSR, def, dummy); | 1223 | continue; |
| 1230 | def &= 0xff; | ||
| 1231 | if (def != MTRR_TYPE_UNCACHABLE) | ||
| 1232 | return 0; | ||
| 1233 | 1224 | ||
| 1234 | /* get it and store it aside */ | 1225 | size_base = to_size_factor(size_base, &size_factor), |
| 1235 | memset(range_state, 0, sizeof(range_state)); | 1226 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); |
| 1236 | for (i = 0; i < num_var_ranges; i++) { | 1227 | start_base = to_size_factor(start_base, &start_factor), |
| 1237 | mtrr_if->get(i, &base, &size, &type); | 1228 | type = range_state[i].type; |
| 1238 | range_state[i].base_pfn = base; | 1229 | |
| 1239 | range_state[i].size_pfn = size; | 1230 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", |
| 1240 | range_state[i].type = type; | 1231 | i, start_base, start_factor, |
| 1232 | size_base, size_factor, | ||
| 1233 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
| 1234 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
| 1235 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
| 1236 | ); | ||
| 1241 | } | 1237 | } |
| 1238 | } | ||
| 1239 | |||
| 1240 | static int __init mtrr_need_cleanup(void) | ||
| 1241 | { | ||
| 1242 | int i; | ||
| 1243 | mtrr_type type; | ||
| 1244 | unsigned long size; | ||
| 1245 | /* extra one for all 0 */ | ||
| 1246 | int num[MTRR_NUM_TYPES + 1]; | ||
| 1242 | 1247 | ||
| 1243 | /* check entries number */ | 1248 | /* check entries number */ |
| 1244 | memset(num, 0, sizeof(num)); | 1249 | memset(num, 0, sizeof(num)); |
| @@ -1263,29 +1268,133 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1263 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1268 | num_var_ranges - num[MTRR_NUM_TYPES]) |
| 1264 | return 0; | 1269 | return 0; |
| 1265 | 1270 | ||
| 1266 | /* print original var MTRRs at first, for debugging: */ | 1271 | return 1; |
| 1267 | printk(KERN_DEBUG "original variable MTRRs\n"); | 1272 | } |
| 1268 | for (i = 0; i < num_var_ranges; i++) { | ||
| 1269 | char start_factor = 'K', size_factor = 'K'; | ||
| 1270 | unsigned long start_base, size_base; | ||
| 1271 | 1273 | ||
| 1272 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | 1274 | static unsigned long __initdata range_sums; |
| 1273 | if (!size_base) | 1275 | static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, |
| 1274 | continue; | 1276 | unsigned long extra_remove_base, |
| 1277 | unsigned long extra_remove_size, | ||
| 1278 | int i) | ||
| 1279 | { | ||
| 1280 | int num_reg; | ||
| 1281 | static struct res_range range_new[RANGE_NUM]; | ||
| 1282 | static int nr_range_new; | ||
| 1283 | unsigned long range_sums_new; | ||
| 1284 | |||
| 1285 | /* convert ranges to var ranges state */ | ||
| 1286 | num_reg = x86_setup_var_mtrrs(range, nr_range, | ||
| 1287 | chunk_size, gran_size); | ||
| 1288 | |||
| 1289 | /* we got new setting in range_state, check it */ | ||
| 1290 | memset(range_new, 0, sizeof(range_new)); | ||
| 1291 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
| 1292 | extra_remove_base, extra_remove_size); | ||
| 1293 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
| 1294 | |||
| 1295 | result[i].chunk_sizek = chunk_size >> 10; | ||
| 1296 | result[i].gran_sizek = gran_size >> 10; | ||
| 1297 | result[i].num_reg = num_reg; | ||
| 1298 | if (range_sums < range_sums_new) { | ||
| 1299 | result[i].lose_cover_sizek = | ||
| 1300 | (range_sums_new - range_sums) << PSHIFT; | ||
| 1301 | result[i].bad = 1; | ||
| 1302 | } else | ||
| 1303 | result[i].lose_cover_sizek = | ||
| 1304 | (range_sums - range_sums_new) << PSHIFT; | ||
| 1275 | 1305 | ||
| 1276 | size_base = to_size_factor(size_base, &size_factor), | 1306 | /* double check it */ |
| 1277 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | 1307 | if (!result[i].bad && !result[i].lose_cover_sizek) { |
| 1278 | start_base = to_size_factor(start_base, &start_factor), | 1308 | if (nr_range_new != nr_range || |
| 1279 | type = range_state[i].type; | 1309 | memcmp(range, range_new, sizeof(range))) |
| 1310 | result[i].bad = 1; | ||
| 1311 | } | ||
| 1280 | 1312 | ||
| 1281 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | 1313 | if (!result[i].bad && (range_sums - range_sums_new < |
| 1282 | i, start_base, start_factor, | 1314 | min_loss_pfn[num_reg])) { |
| 1283 | size_base, size_factor, | 1315 | min_loss_pfn[num_reg] = |
| 1284 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | 1316 | range_sums - range_sums_new; |
| 1285 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
| 1286 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
| 1287 | ); | ||
| 1288 | } | 1317 | } |
| 1318 | } | ||
| 1319 | |||
| 1320 | static void __init mtrr_print_out_one_result(int i) | ||
| 1321 | { | ||
| 1322 | char gran_factor, chunk_factor, lose_factor; | ||
| 1323 | unsigned long gran_base, chunk_base, lose_base; | ||
| 1324 | |||
| 1325 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
| 1326 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
| 1327 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
| 1328 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
| 1329 | result[i].bad ? "*BAD*" : " ", | ||
| 1330 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1331 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
| 1332 | result[i].num_reg, result[i].bad ? "-" : "", | ||
| 1333 | lose_base, lose_factor); | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | static int __init mtrr_search_optimal_index(void) | ||
| 1337 | { | ||
| 1338 | int i; | ||
| 1339 | int num_reg_good; | ||
| 1340 | int index_good; | ||
| 1341 | |||
| 1342 | if (nr_mtrr_spare_reg >= num_var_ranges) | ||
| 1343 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
| 1344 | num_reg_good = -1; | ||
| 1345 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
| 1346 | if (!min_loss_pfn[i]) | ||
| 1347 | num_reg_good = i; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | index_good = -1; | ||
| 1351 | if (num_reg_good != -1) { | ||
| 1352 | for (i = 0; i < NUM_RESULT; i++) { | ||
| 1353 | if (!result[i].bad && | ||
| 1354 | result[i].num_reg == num_reg_good && | ||
| 1355 | !result[i].lose_cover_sizek) { | ||
| 1356 | index_good = i; | ||
| 1357 | break; | ||
| 1358 | } | ||
| 1359 | } | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | return index_good; | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | |||
| 1366 | static int __init mtrr_cleanup(unsigned address_bits) | ||
| 1367 | { | ||
| 1368 | unsigned long extra_remove_base, extra_remove_size; | ||
| 1369 | unsigned long base, size, def, dummy; | ||
| 1370 | mtrr_type type; | ||
| 1371 | u64 chunk_size, gran_size; | ||
| 1372 | int index_good; | ||
| 1373 | int i; | ||
| 1374 | |||
| 1375 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | ||
| 1376 | return 0; | ||
| 1377 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
| 1378 | def &= 0xff; | ||
| 1379 | if (def != MTRR_TYPE_UNCACHABLE) | ||
| 1380 | return 0; | ||
| 1381 | |||
| 1382 | /* get it and store it aside */ | ||
| 1383 | memset(range_state, 0, sizeof(range_state)); | ||
| 1384 | for (i = 0; i < num_var_ranges; i++) { | ||
| 1385 | mtrr_if->get(i, &base, &size, &type); | ||
| 1386 | range_state[i].base_pfn = base; | ||
| 1387 | range_state[i].size_pfn = size; | ||
| 1388 | range_state[i].type = type; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | /* check if we need handle it and can handle it */ | ||
| 1392 | if (!mtrr_need_cleanup()) | ||
| 1393 | return 0; | ||
| 1394 | |||
| 1395 | /* print original var MTRRs at first, for debugging: */ | ||
| 1396 | printk(KERN_DEBUG "original variable MTRRs\n"); | ||
| 1397 | print_out_mtrr_range_state(); | ||
| 1289 | 1398 | ||
| 1290 | memset(range, 0, sizeof(range)); | 1399 | memset(range, 0, sizeof(range)); |
| 1291 | extra_remove_size = 0; | 1400 | extra_remove_size = 0; |
| @@ -1309,176 +1418,64 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1309 | range_sums >> (20 - PAGE_SHIFT)); | 1418 | range_sums >> (20 - PAGE_SHIFT)); |
| 1310 | 1419 | ||
| 1311 | if (mtrr_chunk_size && mtrr_gran_size) { | 1420 | if (mtrr_chunk_size && mtrr_gran_size) { |
| 1312 | int num_reg; | 1421 | i = 0; |
| 1313 | char gran_factor, chunk_factor, lose_factor; | 1422 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, |
| 1314 | unsigned long gran_base, chunk_base, lose_base; | 1423 | extra_remove_base, extra_remove_size, i); |
| 1315 | |||
| 1316 | debug_print++; | ||
| 1317 | /* convert ranges to var ranges state */ | ||
| 1318 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | ||
| 1319 | mtrr_gran_size); | ||
| 1320 | 1424 | ||
| 1321 | /* we got new setting in range_state, check it */ | 1425 | mtrr_print_out_one_result(i); |
| 1322 | memset(range_new, 0, sizeof(range_new)); | ||
| 1323 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
| 1324 | extra_remove_base, | ||
| 1325 | extra_remove_size); | ||
| 1326 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
| 1327 | 1426 | ||
| 1328 | i = 0; | ||
| 1329 | result[i].chunk_sizek = mtrr_chunk_size >> 10; | ||
| 1330 | result[i].gran_sizek = mtrr_gran_size >> 10; | ||
| 1331 | result[i].num_reg = num_reg; | ||
| 1332 | if (range_sums < range_sums_new) { | ||
| 1333 | result[i].lose_cover_sizek = | ||
| 1334 | (range_sums_new - range_sums) << PSHIFT; | ||
| 1335 | result[i].bad = 1; | ||
| 1336 | } else | ||
| 1337 | result[i].lose_cover_sizek = | ||
| 1338 | (range_sums - range_sums_new) << PSHIFT; | ||
| 1339 | |||
| 1340 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
| 1341 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
| 1342 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
| 1343 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
| 1344 | result[i].bad?"*BAD*":" ", | ||
| 1345 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1346 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
| 1347 | result[i].num_reg, result[i].bad?"-":"", | ||
| 1348 | lose_base, lose_factor); | ||
| 1349 | if (!result[i].bad) { | 1427 | if (!result[i].bad) { |
| 1350 | set_var_mtrr_all(address_bits); | 1428 | set_var_mtrr_all(address_bits); |
| 1351 | return 1; | 1429 | return 1; |
| 1352 | } | 1430 | } |
| 1353 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | 1431 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " |
| 1354 | "will find optimal one\n"); | 1432 | "will find optimal one\n"); |
| 1355 | debug_print--; | ||
| 1356 | memset(result, 0, sizeof(result[0])); | ||
| 1357 | } | 1433 | } |
| 1358 | 1434 | ||
| 1359 | i = 0; | 1435 | i = 0; |
| 1360 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | 1436 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); |
| 1361 | memset(result, 0, sizeof(result)); | 1437 | memset(result, 0, sizeof(result)); |
| 1362 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { | 1438 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { |
| 1363 | char gran_factor; | ||
| 1364 | unsigned long gran_base; | ||
| 1365 | |||
| 1366 | if (debug_print) | ||
| 1367 | gran_base = to_size_factor(gran_size >> 10, &gran_factor); | ||
| 1368 | 1439 | ||
| 1369 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | 1440 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); |
| 1370 | chunk_size <<= 1) { | 1441 | chunk_size <<= 1) { |
| 1371 | int num_reg; | ||
| 1372 | 1442 | ||
| 1373 | if (debug_print) { | ||
| 1374 | char chunk_factor; | ||
| 1375 | unsigned long chunk_base; | ||
| 1376 | |||
| 1377 | chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), | ||
| 1378 | printk(KERN_INFO "\n"); | ||
| 1379 | printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", | ||
| 1380 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1381 | } | ||
| 1382 | if (i >= NUM_RESULT) | 1443 | if (i >= NUM_RESULT) |
| 1383 | continue; | 1444 | continue; |
| 1384 | 1445 | ||
| 1385 | /* convert ranges to var ranges state */ | 1446 | mtrr_calc_range_state(chunk_size, gran_size, |
| 1386 | num_reg = x86_setup_var_mtrrs(range, nr_range, | 1447 | extra_remove_base, extra_remove_size, i); |
| 1387 | chunk_size, gran_size); | 1448 | if (debug_print) { |
| 1388 | 1449 | mtrr_print_out_one_result(i); | |
| 1389 | /* we got new setting in range_state, check it */ | 1450 | printk(KERN_INFO "\n"); |
| 1390 | memset(range_new, 0, sizeof(range_new)); | ||
| 1391 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
| 1392 | extra_remove_base, extra_remove_size); | ||
| 1393 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
| 1394 | |||
| 1395 | result[i].chunk_sizek = chunk_size >> 10; | ||
| 1396 | result[i].gran_sizek = gran_size >> 10; | ||
| 1397 | result[i].num_reg = num_reg; | ||
| 1398 | if (range_sums < range_sums_new) { | ||
| 1399 | result[i].lose_cover_sizek = | ||
| 1400 | (range_sums_new - range_sums) << PSHIFT; | ||
| 1401 | result[i].bad = 1; | ||
| 1402 | } else | ||
| 1403 | result[i].lose_cover_sizek = | ||
| 1404 | (range_sums - range_sums_new) << PSHIFT; | ||
| 1405 | |||
| 1406 | /* double check it */ | ||
| 1407 | if (!result[i].bad && !result[i].lose_cover_sizek) { | ||
| 1408 | if (nr_range_new != nr_range || | ||
| 1409 | memcmp(range, range_new, sizeof(range))) | ||
| 1410 | result[i].bad = 1; | ||
| 1411 | } | 1451 | } |
| 1412 | 1452 | ||
| 1413 | if (!result[i].bad && (range_sums - range_sums_new < | ||
| 1414 | min_loss_pfn[num_reg])) { | ||
| 1415 | min_loss_pfn[num_reg] = | ||
| 1416 | range_sums - range_sums_new; | ||
| 1417 | } | ||
| 1418 | i++; | 1453 | i++; |
| 1419 | } | 1454 | } |
| 1420 | } | 1455 | } |
| 1421 | 1456 | ||
| 1422 | /* print out all */ | ||
| 1423 | for (i = 0; i < NUM_RESULT; i++) { | ||
| 1424 | char gran_factor, chunk_factor, lose_factor; | ||
| 1425 | unsigned long gran_base, chunk_base, lose_base; | ||
| 1426 | |||
| 1427 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
| 1428 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
| 1429 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
| 1430 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
| 1431 | result[i].bad?"*BAD*":" ", | ||
| 1432 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1433 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
| 1434 | result[i].num_reg, result[i].bad?"-":"", | ||
| 1435 | lose_base, lose_factor); | ||
| 1436 | } | ||
| 1437 | |||
| 1438 | /* try to find the optimal index */ | 1457 | /* try to find the optimal index */ |
| 1439 | if (nr_mtrr_spare_reg >= num_var_ranges) | 1458 | index_good = mtrr_search_optimal_index(); |
| 1440 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
| 1441 | num_reg_good = -1; | ||
| 1442 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
| 1443 | if (!min_loss_pfn[i]) | ||
| 1444 | num_reg_good = i; | ||
| 1445 | } | ||
| 1446 | |||
| 1447 | index_good = -1; | ||
| 1448 | if (num_reg_good != -1) { | ||
| 1449 | for (i = 0; i < NUM_RESULT; i++) { | ||
| 1450 | if (!result[i].bad && | ||
| 1451 | result[i].num_reg == num_reg_good && | ||
| 1452 | !result[i].lose_cover_sizek) { | ||
| 1453 | index_good = i; | ||
| 1454 | break; | ||
| 1455 | } | ||
| 1456 | } | ||
| 1457 | } | ||
| 1458 | 1459 | ||
| 1459 | if (index_good != -1) { | 1460 | if (index_good != -1) { |
| 1460 | char gran_factor, chunk_factor, lose_factor; | ||
| 1461 | unsigned long gran_base, chunk_base, lose_base; | ||
| 1462 | |||
| 1463 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | 1461 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); |
| 1464 | i = index_good; | 1462 | i = index_good; |
| 1465 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 1463 | mtrr_print_out_one_result(i); |
| 1466 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 1464 | |
| 1467 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
| 1468 | printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", | ||
| 1469 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1470 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", | ||
| 1471 | result[i].num_reg, lose_base, lose_factor); | ||
| 1472 | /* convert ranges to var ranges state */ | 1465 | /* convert ranges to var ranges state */ |
| 1473 | chunk_size = result[i].chunk_sizek; | 1466 | chunk_size = result[i].chunk_sizek; |
| 1474 | chunk_size <<= 10; | 1467 | chunk_size <<= 10; |
| 1475 | gran_size = result[i].gran_sizek; | 1468 | gran_size = result[i].gran_sizek; |
| 1476 | gran_size <<= 10; | 1469 | gran_size <<= 10; |
| 1477 | debug_print++; | ||
| 1478 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | 1470 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
| 1479 | debug_print--; | ||
| 1480 | set_var_mtrr_all(address_bits); | 1471 | set_var_mtrr_all(address_bits); |
| 1472 | printk(KERN_DEBUG "New variable MTRRs\n"); | ||
| 1473 | print_out_mtrr_range_state(); | ||
| 1481 | return 1; | 1474 | return 1; |
| 1475 | } else { | ||
| 1476 | /* print out all */ | ||
| 1477 | for (i = 0; i < NUM_RESULT; i++) | ||
| 1478 | mtrr_print_out_one_result(i); | ||
| 1482 | } | 1479 | } |
| 1483 | 1480 | ||
| 1484 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); | 1481 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); |
| @@ -1562,7 +1559,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
| 1562 | { | 1559 | { |
| 1563 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | 1560 | unsigned long i, base, size, highest_pfn = 0, def, dummy; |
| 1564 | mtrr_type type; | 1561 | mtrr_type type; |
| 1565 | int nr_range; | ||
| 1566 | u64 total_trim_size; | 1562 | u64 total_trim_size; |
| 1567 | 1563 | ||
| 1568 | /* extra one for all 0 */ | 1564 | /* extra one for all 0 */ |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c new file mode 100644 index 000000000000..284c399e3234 --- /dev/null +++ b/arch/x86/kernel/cpu/vmware.c | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | /* | ||
| 2 | * VMware Detection code. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2008, VMware, Inc. | ||
| 5 | * Author : Alok N Kataria <akataria@vmware.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License as published by | ||
| 9 | * the Free Software Foundation; either version 2 of the License, or | ||
| 10 | * (at your option) any later version. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, but | ||
| 13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 15 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 16 | * details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public License | ||
| 19 | * along with this program; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | #include <linux/dmi.h> | ||
| 25 | #include <asm/div64.h> | ||
| 26 | #include <asm/vmware.h> | ||
| 27 | |||
| 28 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | ||
| 29 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | ||
| 30 | #define VMWARE_HYPERVISOR_PORT 0x5658 | ||
| 31 | |||
| 32 | #define VMWARE_PORT_CMD_GETVERSION 10 | ||
| 33 | #define VMWARE_PORT_CMD_GETHZ 45 | ||
| 34 | |||
| 35 | #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ | ||
| 36 | __asm__("inl (%%dx)" : \ | ||
| 37 | "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ | ||
| 38 | "0"(VMWARE_HYPERVISOR_MAGIC), \ | ||
| 39 | "1"(VMWARE_PORT_CMD_##cmd), \ | ||
| 40 | "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \ | ||
| 41 | "memory"); | ||
| 42 | |||
| 43 | static inline int __vmware_platform(void) | ||
| 44 | { | ||
| 45 | uint32_t eax, ebx, ecx, edx; | ||
| 46 | VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx); | ||
| 47 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; | ||
| 48 | } | ||
| 49 | |||
| 50 | static unsigned long __vmware_get_tsc_khz(void) | ||
| 51 | { | ||
| 52 | uint64_t tsc_hz; | ||
| 53 | uint32_t eax, ebx, ecx, edx; | ||
| 54 | |||
| 55 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | ||
| 56 | |||
| 57 | if (ebx == UINT_MAX) | ||
| 58 | return 0; | ||
| 59 | tsc_hz = eax | (((uint64_t)ebx) << 32); | ||
| 60 | do_div(tsc_hz, 1000); | ||
| 61 | BUG_ON(tsc_hz >> 32); | ||
| 62 | return tsc_hz; | ||
| 63 | } | ||
| 64 | |||
| 65 | /* | ||
| 66 | * While checking the dmi string infomation, just checking the product | ||
| 67 | * serial key should be enough, as this will always have a VMware | ||
| 68 | * specific string when running under VMware hypervisor. | ||
| 69 | */ | ||
| 70 | int vmware_platform(void) | ||
| 71 | { | ||
| 72 | if (cpu_has_hypervisor) { | ||
| 73 | unsigned int eax, ebx, ecx, edx; | ||
| 74 | char hyper_vendor_id[13]; | ||
| 75 | |||
| 76 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | ||
| 77 | memcpy(hyper_vendor_id + 0, &ebx, 4); | ||
| 78 | memcpy(hyper_vendor_id + 4, &ecx, 4); | ||
| 79 | memcpy(hyper_vendor_id + 8, &edx, 4); | ||
| 80 | hyper_vendor_id[12] = '\0'; | ||
| 81 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
| 82 | return 1; | ||
| 83 | } else if (dmi_available && dmi_name_in_serial("VMware") && | ||
| 84 | __vmware_platform()) | ||
| 85 | return 1; | ||
| 86 | |||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | unsigned long vmware_get_tsc_khz(void) | ||
| 91 | { | ||
| 92 | BUG_ON(!vmware_platform()); | ||
| 93 | return __vmware_get_tsc_khz(); | ||
| 94 | } | ||
| 95 | |||
| 96 | /* | ||
| 97 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | ||
| 98 | * Still, due to timing difference when running on virtual cpus, the TSC can | ||
| 99 | * be marked as unstable in some cases. For example, the TSC sync check at | ||
| 100 | * bootup can fail due to a marginal offset between vcpus' TSCs (though the | ||
| 101 | * TSCs do not drift from each other). Also, the ACPI PM timer clocksource | ||
| 102 | * is not suitable as a watchdog when running on a hypervisor because the | ||
| 103 | * kernel may miss a wrap of the counter if the vcpu is descheduled for a | ||
| 104 | * long time. To skip these checks at runtime we set these capability bits, | ||
| 105 | * so that the kernel could just trust the hypervisor with providing a | ||
| 106 | * reliable virtual TSC that is suitable for timekeeping. | ||
| 107 | */ | ||
| 108 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | ||
| 109 | { | ||
| 110 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 111 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | ||
| 112 | } | ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 268553817909..d84a852e4cd7 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
| @@ -29,34 +29,17 @@ | |||
| 29 | 29 | ||
| 30 | #include <mach_ipi.h> | 30 | #include <mach_ipi.h> |
| 31 | 31 | ||
| 32 | /* This keeps a track of which one is crashing cpu. */ | ||
| 33 | static int crashing_cpu; | ||
| 34 | 32 | ||
| 35 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
| 36 | static atomic_t waiting_for_crash_ipi; | ||
| 37 | 34 | ||
| 38 | static int crash_nmi_callback(struct notifier_block *self, | 35 | static void kdump_nmi_callback(int cpu, struct die_args *args) |
| 39 | unsigned long val, void *data) | ||
| 40 | { | 36 | { |
| 41 | struct pt_regs *regs; | 37 | struct pt_regs *regs; |
| 42 | #ifdef CONFIG_X86_32 | 38 | #ifdef CONFIG_X86_32 |
| 43 | struct pt_regs fixed_regs; | 39 | struct pt_regs fixed_regs; |
| 44 | #endif | 40 | #endif |
| 45 | int cpu; | ||
| 46 | 41 | ||
| 47 | if (val != DIE_NMI_IPI) | 42 | regs = args->regs; |
| 48 | return NOTIFY_OK; | ||
| 49 | |||
| 50 | regs = ((struct die_args *)data)->regs; | ||
| 51 | cpu = raw_smp_processor_id(); | ||
| 52 | |||
| 53 | /* Don't do anything if this handler is invoked on crashing cpu. | ||
| 54 | * Otherwise, system will completely hang. Crashing cpu can get | ||
| 55 | * an NMI if system was initially booted with nmi_watchdog parameter. | ||
| 56 | */ | ||
| 57 | if (cpu == crashing_cpu) | ||
| 58 | return NOTIFY_STOP; | ||
| 59 | local_irq_disable(); | ||
| 60 | 43 | ||
| 61 | #ifdef CONFIG_X86_32 | 44 | #ifdef CONFIG_X86_32 |
| 62 | if (!user_mode_vm(regs)) { | 45 | if (!user_mode_vm(regs)) { |
| @@ -65,54 +48,19 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
| 65 | } | 48 | } |
| 66 | #endif | 49 | #endif |
| 67 | crash_save_cpu(regs, cpu); | 50 | crash_save_cpu(regs, cpu); |
| 68 | disable_local_APIC(); | ||
| 69 | atomic_dec(&waiting_for_crash_ipi); | ||
| 70 | /* Assume hlt works */ | ||
| 71 | halt(); | ||
| 72 | for (;;) | ||
| 73 | cpu_relax(); | ||
| 74 | |||
| 75 | return 1; | ||
| 76 | } | ||
| 77 | 51 | ||
| 78 | static void smp_send_nmi_allbutself(void) | 52 | disable_local_APIC(); |
| 79 | { | ||
| 80 | cpumask_t mask = cpu_online_map; | ||
| 81 | cpu_clear(safe_smp_processor_id(), mask); | ||
| 82 | if (!cpus_empty(mask)) | ||
| 83 | send_IPI_mask(mask, NMI_VECTOR); | ||
| 84 | } | 53 | } |
| 85 | 54 | ||
| 86 | static struct notifier_block crash_nmi_nb = { | 55 | static void kdump_nmi_shootdown_cpus(void) |
| 87 | .notifier_call = crash_nmi_callback, | ||
| 88 | }; | ||
| 89 | |||
| 90 | static void nmi_shootdown_cpus(void) | ||
| 91 | { | 56 | { |
| 92 | unsigned long msecs; | 57 | nmi_shootdown_cpus(kdump_nmi_callback); |
| 93 | |||
| 94 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | ||
| 95 | /* Would it be better to replace the trap vector here? */ | ||
| 96 | if (register_die_notifier(&crash_nmi_nb)) | ||
| 97 | return; /* return what? */ | ||
| 98 | /* Ensure the new callback function is set before sending | ||
| 99 | * out the NMI | ||
| 100 | */ | ||
| 101 | wmb(); | ||
| 102 | 58 | ||
| 103 | smp_send_nmi_allbutself(); | ||
| 104 | |||
| 105 | msecs = 1000; /* Wait at most a second for the other cpus to stop */ | ||
| 106 | while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { | ||
| 107 | mdelay(1); | ||
| 108 | msecs--; | ||
| 109 | } | ||
| 110 | |||
| 111 | /* Leave the nmi callback set */ | ||
| 112 | disable_local_APIC(); | 59 | disable_local_APIC(); |
| 113 | } | 60 | } |
| 61 | |||
| 114 | #else | 62 | #else |
| 115 | static void nmi_shootdown_cpus(void) | 63 | static void kdump_nmi_shootdown_cpus(void) |
| 116 | { | 64 | { |
| 117 | /* There are no cpus to shootdown */ | 65 | /* There are no cpus to shootdown */ |
| 118 | } | 66 | } |
| @@ -131,9 +79,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
| 131 | /* The kernel is broken so disable interrupts */ | 79 | /* The kernel is broken so disable interrupts */ |
| 132 | local_irq_disable(); | 80 | local_irq_disable(); |
| 133 | 81 | ||
| 134 | /* Make a note of crashing cpu. Will be used in NMI callback.*/ | 82 | kdump_nmi_shootdown_cpus(); |
| 135 | crashing_cpu = safe_smp_processor_id(); | ||
| 136 | nmi_shootdown_cpus(); | ||
| 137 | lapic_shutdown(); | 83 | lapic_shutdown(); |
| 138 | #if defined(CONFIG_X86_IO_APIC) | 84 | #if defined(CONFIG_X86_IO_APIC) |
| 139 | disable_IO_APIC(); | 85 | disable_IO_APIC(); |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index a2d1176c38ee..da91701a2348 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
| @@ -6,14 +6,13 @@ | |||
| 6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
| 7 | * | 7 | * |
| 8 | * It manages: | 8 | * It manages: |
| 9 | * - per-thread and per-cpu allocation of BTS and PEBS | 9 | * - DS and BTS hardware configuration |
| 10 | * - buffer memory allocation (optional) | 10 | * - buffer overflow handling (to be done) |
| 11 | * - buffer overflow handling | ||
| 12 | * - buffer access | 11 | * - buffer access |
| 13 | * | 12 | * |
| 14 | * It assumes: | 13 | * It does not do: |
| 15 | * - get_task_struct on all parameter tasks | 14 | * - security checking (is the caller allowed to trace the task) |
| 16 | * - current is allowed to trace parameter tasks | 15 | * - buffer allocation (memory accounting) |
| 17 | * | 16 | * |
| 18 | * | 17 | * |
| 19 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2008 Intel Corporation. |
| @@ -28,22 +27,69 @@ | |||
| 28 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
| 29 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
| 30 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
| 30 | #include <linux/kernel.h> | ||
| 31 | 31 | ||
| 32 | 32 | ||
| 33 | /* | 33 | /* |
| 34 | * The configuration for a particular DS hardware implementation. | 34 | * The configuration for a particular DS hardware implementation. |
| 35 | */ | 35 | */ |
| 36 | struct ds_configuration { | 36 | struct ds_configuration { |
| 37 | /* the size of the DS structure in bytes */ | 37 | /* the name of the configuration */ |
| 38 | unsigned char sizeof_ds; | 38 | const char *name; |
| 39 | /* the size of one pointer-typed field in the DS structure in bytes; | 39 | /* the size of one pointer-typed field in the DS structure and |
| 40 | this covers the first 8 fields related to buffer management. */ | 40 | in the BTS and PEBS buffers in bytes; |
| 41 | this covers the first 8 DS fields related to buffer management. */ | ||
| 41 | unsigned char sizeof_field; | 42 | unsigned char sizeof_field; |
| 42 | /* the size of a BTS/PEBS record in bytes */ | 43 | /* the size of a BTS/PEBS record in bytes */ |
| 43 | unsigned char sizeof_rec[2]; | 44 | unsigned char sizeof_rec[2]; |
| 45 | /* a series of bit-masks to control various features indexed | ||
| 46 | * by enum ds_feature */ | ||
| 47 | unsigned long ctl[dsf_ctl_max]; | ||
| 44 | }; | 48 | }; |
| 45 | static struct ds_configuration ds_cfg; | 49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); |
| 46 | 50 | ||
| 51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | ||
| 52 | |||
| 53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | ||
| 54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | ||
| 55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
| 56 | |||
| 57 | #define BTS_CONTROL \ | ||
| 58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | ||
| 59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
| 60 | |||
| 61 | |||
| 62 | /* | ||
| 63 | * A BTS or PEBS tracer. | ||
| 64 | * | ||
| 65 | * This holds the configuration of the tracer and serves as a handle | ||
| 66 | * to identify tracers. | ||
| 67 | */ | ||
| 68 | struct ds_tracer { | ||
| 69 | /* the DS context (partially) owned by this tracer */ | ||
| 70 | struct ds_context *context; | ||
| 71 | /* the buffer provided on ds_request() and its size in bytes */ | ||
| 72 | void *buffer; | ||
| 73 | size_t size; | ||
| 74 | }; | ||
| 75 | |||
| 76 | struct bts_tracer { | ||
| 77 | /* the common DS part */ | ||
| 78 | struct ds_tracer ds; | ||
| 79 | /* the trace including the DS configuration */ | ||
| 80 | struct bts_trace trace; | ||
| 81 | /* buffer overflow notification function */ | ||
| 82 | bts_ovfl_callback_t ovfl; | ||
| 83 | }; | ||
| 84 | |||
| 85 | struct pebs_tracer { | ||
| 86 | /* the common DS part */ | ||
| 87 | struct ds_tracer ds; | ||
| 88 | /* the trace including the DS configuration */ | ||
| 89 | struct pebs_trace trace; | ||
| 90 | /* buffer overflow notification function */ | ||
| 91 | pebs_ovfl_callback_t ovfl; | ||
| 92 | }; | ||
| 47 | 93 | ||
| 48 | /* | 94 | /* |
| 49 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | 95 | * Debug Store (DS) save area configuration (see Intel64 and IA32 |
| @@ -109,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
| 109 | 155 | ||
| 110 | 156 | ||
| 111 | /* | 157 | /* |
| 112 | * Locking is done only for allocating BTS or PEBS resources and for | 158 | * Locking is done only for allocating BTS or PEBS resources. |
| 113 | * guarding context and buffer memory allocation. | ||
| 114 | * | ||
| 115 | * Most functions require the current task to own the ds context part | ||
| 116 | * they are going to access. All the locking is done when validating | ||
| 117 | * access to the context. | ||
| 118 | */ | 159 | */ |
| 119 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); | 160 | static DEFINE_SPINLOCK(ds_lock); |
| 120 | |||
| 121 | /* | ||
| 122 | * Validate that the current task is allowed to access the BTS/PEBS | ||
| 123 | * buffer of the parameter task. | ||
| 124 | * | ||
| 125 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
| 126 | */ | ||
| 127 | static inline int ds_validate_access(struct ds_context *context, | ||
| 128 | enum ds_qualifier qual) | ||
| 129 | { | ||
| 130 | if (!context) | ||
| 131 | return -EPERM; | ||
| 132 | |||
| 133 | if (context->owner[qual] == current) | ||
| 134 | return 0; | ||
| 135 | |||
| 136 | return -EPERM; | ||
| 137 | } | ||
| 138 | 161 | ||
| 139 | 162 | ||
| 140 | /* | 163 | /* |
| @@ -150,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context, | |||
| 150 | * >0 number of per-thread tracers | 173 | * >0 number of per-thread tracers |
| 151 | * <0 number of per-cpu tracers | 174 | * <0 number of per-cpu tracers |
| 152 | * | 175 | * |
| 153 | * The below functions to get and put tracers and to check the | ||
| 154 | * allocation type require the ds_lock to be held by the caller. | ||
| 155 | * | ||
| 156 | * Tracers essentially gives the number of ds contexts for a certain | 176 | * Tracers essentially gives the number of ds contexts for a certain |
| 157 | * type of allocation. | 177 | * type of allocation. |
| 158 | */ | 178 | */ |
| 159 | static long tracers; | 179 | static atomic_t tracers = ATOMIC_INIT(0); |
| 160 | 180 | ||
| 161 | static inline void get_tracer(struct task_struct *task) | 181 | static inline void get_tracer(struct task_struct *task) |
| 162 | { | 182 | { |
| 163 | tracers += (task ? 1 : -1); | 183 | if (task) |
| 184 | atomic_inc(&tracers); | ||
| 185 | else | ||
| 186 | atomic_dec(&tracers); | ||
| 164 | } | 187 | } |
| 165 | 188 | ||
| 166 | static inline void put_tracer(struct task_struct *task) | 189 | static inline void put_tracer(struct task_struct *task) |
| 167 | { | 190 | { |
| 168 | tracers -= (task ? 1 : -1); | 191 | if (task) |
| 192 | atomic_dec(&tracers); | ||
| 193 | else | ||
| 194 | atomic_inc(&tracers); | ||
| 169 | } | 195 | } |
| 170 | 196 | ||
| 171 | static inline int check_tracer(struct task_struct *task) | 197 | static inline int check_tracer(struct task_struct *task) |
| 172 | { | 198 | { |
| 173 | return (task ? (tracers >= 0) : (tracers <= 0)); | 199 | return task ? |
| 200 | (atomic_read(&tracers) >= 0) : | ||
| 201 | (atomic_read(&tracers) <= 0); | ||
| 174 | } | 202 | } |
| 175 | 203 | ||
| 176 | 204 | ||
| @@ -183,99 +211,70 @@ static inline int check_tracer(struct task_struct *task) | |||
| 183 | * | 211 | * |
| 184 | * Contexts are use-counted. They are allocated on first access and | 212 | * Contexts are use-counted. They are allocated on first access and |
| 185 | * deallocated when the last user puts the context. | 213 | * deallocated when the last user puts the context. |
| 186 | * | ||
| 187 | * We distinguish between an allocating and a non-allocating get of a | ||
| 188 | * context: | ||
| 189 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
| 190 | * requires the caller to hold the global ds_lock. | ||
| 191 | * - the non-allocating get is used for all other cases. A | ||
| 192 | * non-existing context indicates an error. It acquires and releases | ||
| 193 | * the ds_lock itself for obtaining the context. | ||
| 194 | * | ||
| 195 | * A context and its DS configuration are allocated and deallocated | ||
| 196 | * together. A context always has a DS configuration of the | ||
| 197 | * appropriate size. | ||
| 198 | */ | ||
| 199 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
| 200 | |||
| 201 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
| 202 | |||
| 203 | /* | ||
| 204 | * Returns the pointer to the parameter task's context or to the | ||
| 205 | * system-wide context, if task is NULL. | ||
| 206 | * | ||
| 207 | * Increases the use count of the returned context, if not NULL. | ||
| 208 | */ | 214 | */ |
| 209 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 215 | struct ds_context { |
| 210 | { | 216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ |
| 211 | struct ds_context *context; | 217 | unsigned char ds[MAX_SIZEOF_DS]; |
| 212 | unsigned long irq; | 218 | /* the owner of the BTS and PEBS configuration, respectively */ |
| 219 | struct bts_tracer *bts_master; | ||
| 220 | struct pebs_tracer *pebs_master; | ||
| 221 | /* use count */ | ||
| 222 | unsigned long count; | ||
| 223 | /* a pointer to the context location inside the thread_struct | ||
| 224 | * or the per_cpu context array */ | ||
| 225 | struct ds_context **this; | ||
| 226 | /* a pointer to the task owning this context, or NULL, if the | ||
| 227 | * context is owned by a cpu */ | ||
| 228 | struct task_struct *task; | ||
| 229 | }; | ||
| 213 | 230 | ||
| 214 | spin_lock_irqsave(&ds_lock, irq); | 231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); |
| 215 | 232 | ||
| 216 | context = (task ? task->thread.ds_ctx : this_system_context); | 233 | #define system_context per_cpu(system_context_array, smp_processor_id()) |
| 217 | if (context) | ||
| 218 | context->count++; | ||
| 219 | 234 | ||
| 220 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 221 | |||
| 222 | return context; | ||
| 223 | } | ||
| 224 | 235 | ||
| 225 | /* | 236 | static inline struct ds_context *ds_get_context(struct task_struct *task) |
| 226 | * Same as ds_get_context, but allocates the context and it's DS | ||
| 227 | * structure, if necessary; returns NULL; if out of memory. | ||
| 228 | */ | ||
| 229 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
| 230 | { | 237 | { |
| 231 | struct ds_context **p_context = | 238 | struct ds_context **p_context = |
| 232 | (task ? &task->thread.ds_ctx : &this_system_context); | 239 | (task ? &task->thread.ds_ctx : &system_context); |
| 233 | struct ds_context *context = *p_context; | 240 | struct ds_context *context = NULL; |
| 241 | struct ds_context *new_context = NULL; | ||
| 234 | unsigned long irq; | 242 | unsigned long irq; |
| 235 | 243 | ||
| 236 | if (!context) { | 244 | /* Chances are small that we already have a context. */ |
| 237 | context = kzalloc(sizeof(*context), GFP_KERNEL); | 245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
| 238 | if (!context) | 246 | if (!new_context) |
| 239 | return NULL; | 247 | return NULL; |
| 240 | |||
| 241 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
| 242 | if (!context->ds) { | ||
| 243 | kfree(context); | ||
| 244 | return NULL; | ||
| 245 | } | ||
| 246 | 248 | ||
| 247 | spin_lock_irqsave(&ds_lock, irq); | 249 | spin_lock_irqsave(&ds_lock, irq); |
| 248 | 250 | ||
| 249 | if (*p_context) { | 251 | context = *p_context; |
| 250 | kfree(context->ds); | 252 | if (!context) { |
| 251 | kfree(context); | 253 | context = new_context; |
| 252 | 254 | ||
| 253 | context = *p_context; | 255 | context->this = p_context; |
| 254 | } else { | 256 | context->task = task; |
| 255 | *p_context = context; | 257 | context->count = 0; |
| 256 | 258 | ||
| 257 | context->this = p_context; | 259 | if (task) |
| 258 | context->task = task; | 260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); |
| 259 | 261 | ||
| 260 | if (task) | 262 | if (!task || (task == current)) |
| 261 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | 263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); |
| 262 | 264 | ||
| 263 | if (!task || (task == current)) | 265 | *p_context = context; |
| 264 | wrmsrl(MSR_IA32_DS_AREA, | ||
| 265 | (unsigned long)context->ds); | ||
| 266 | } | ||
| 267 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 268 | } | 266 | } |
| 269 | 267 | ||
| 270 | context->count++; | 268 | context->count++; |
| 271 | 269 | ||
| 270 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 271 | |||
| 272 | if (context != new_context) | ||
| 273 | kfree(new_context); | ||
| 274 | |||
| 272 | return context; | 275 | return context; |
| 273 | } | 276 | } |
| 274 | 277 | ||
| 275 | /* | ||
| 276 | * Decreases the use count of the parameter context, if not NULL. | ||
| 277 | * Deallocates the context, if the use count reaches zero. | ||
| 278 | */ | ||
| 279 | static inline void ds_put_context(struct ds_context *context) | 278 | static inline void ds_put_context(struct ds_context *context) |
| 280 | { | 279 | { |
| 281 | unsigned long irq; | 280 | unsigned long irq; |
| @@ -285,8 +284,10 @@ static inline void ds_put_context(struct ds_context *context) | |||
| 285 | 284 | ||
| 286 | spin_lock_irqsave(&ds_lock, irq); | 285 | spin_lock_irqsave(&ds_lock, irq); |
| 287 | 286 | ||
| 288 | if (--context->count) | 287 | if (--context->count) { |
| 289 | goto out; | 288 | spin_unlock_irqrestore(&ds_lock, irq); |
| 289 | return; | ||
| 290 | } | ||
| 290 | 291 | ||
| 291 | *(context->this) = NULL; | 292 | *(context->this) = NULL; |
| 292 | 293 | ||
| @@ -296,135 +297,263 @@ static inline void ds_put_context(struct ds_context *context) | |||
| 296 | if (!context->task || (context->task == current)) | 297 | if (!context->task || (context->task == current)) |
| 297 | wrmsrl(MSR_IA32_DS_AREA, 0); | 298 | wrmsrl(MSR_IA32_DS_AREA, 0); |
| 298 | 299 | ||
| 299 | put_tracer(context->task); | 300 | spin_unlock_irqrestore(&ds_lock, irq); |
| 300 | 301 | ||
| 301 | /* free any leftover buffers from tracers that did not | ||
| 302 | * deallocate them properly. */ | ||
| 303 | kfree(context->buffer[ds_bts]); | ||
| 304 | kfree(context->buffer[ds_pebs]); | ||
| 305 | kfree(context->ds); | ||
| 306 | kfree(context); | 302 | kfree(context); |
| 307 | out: | ||
| 308 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 309 | } | 303 | } |
| 310 | 304 | ||
| 311 | 305 | ||
| 312 | /* | 306 | /* |
| 313 | * Handle a buffer overflow | 307 | * Call the tracer's callback on a buffer overflow. |
| 314 | * | 308 | * |
| 315 | * task: the task whose buffers are overflowing; | ||
| 316 | * NULL for a buffer overflow on the current cpu | ||
| 317 | * context: the ds context | 309 | * context: the ds context |
| 318 | * qual: the buffer type | 310 | * qual: the buffer type |
| 319 | */ | 311 | */ |
| 320 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | 312 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
| 321 | enum ds_qualifier qual) | ||
| 322 | { | 313 | { |
| 323 | if (!context) | 314 | switch (qual) { |
| 324 | return; | 315 | case ds_bts: |
| 325 | 316 | if (context->bts_master && | |
| 326 | if (context->callback[qual]) | 317 | context->bts_master->ovfl) |
| 327 | (*context->callback[qual])(task); | 318 | context->bts_master->ovfl(context->bts_master); |
| 328 | 319 | break; | |
| 329 | /* todo: do some more overflow handling */ | 320 | case ds_pebs: |
| 321 | if (context->pebs_master && | ||
| 322 | context->pebs_master->ovfl) | ||
| 323 | context->pebs_master->ovfl(context->pebs_master); | ||
| 324 | break; | ||
| 325 | } | ||
| 330 | } | 326 | } |
| 331 | 327 | ||
| 332 | 328 | ||
| 333 | /* | 329 | /* |
| 334 | * Allocate a non-pageable buffer of the parameter size. | 330 | * Write raw data into the BTS or PEBS buffer. |
| 335 | * Checks the memory and the locked memory rlimit. | ||
| 336 | * | 331 | * |
| 337 | * Returns the buffer, if successful; | 332 | * The remainder of any partially written record is zeroed out. |
| 338 | * NULL, if out of memory or rlimit exceeded. | ||
| 339 | * | 333 | * |
| 340 | * size: the requested buffer size in bytes | 334 | * context: the DS context |
| 341 | * pages (out): if not NULL, contains the number of pages reserved | 335 | * qual: the buffer type |
| 336 | * record: the data to write | ||
| 337 | * size: the size of the data | ||
| 342 | */ | 338 | */ |
| 343 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | 339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
| 340 | const void *record, size_t size) | ||
| 344 | { | 341 | { |
| 345 | unsigned long rlim, vm, pgsz; | 342 | int bytes_written = 0; |
| 346 | void *buffer; | ||
| 347 | 343 | ||
| 348 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 344 | if (!record) |
| 345 | return -EINVAL; | ||
| 349 | 346 | ||
| 350 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | 347 | while (size) { |
| 351 | vm = current->mm->total_vm + pgsz; | 348 | unsigned long base, index, end, write_end, int_th; |
| 352 | if (rlim < vm) | 349 | unsigned long write_size, adj_write_size; |
| 353 | return NULL; | ||
| 354 | 350 | ||
| 355 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 351 | /* |
| 356 | vm = current->mm->locked_vm + pgsz; | 352 | * write as much as possible without producing an |
| 357 | if (rlim < vm) | 353 | * overflow interrupt. |
| 358 | return NULL; | 354 | * |
| 355 | * interrupt_threshold must either be | ||
| 356 | * - bigger than absolute_maximum or | ||
| 357 | * - point to a record between buffer_base and absolute_maximum | ||
| 358 | * | ||
| 359 | * index points to a valid record. | ||
| 360 | */ | ||
| 361 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 362 | index = ds_get(context->ds, qual, ds_index); | ||
| 363 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 364 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
| 359 | 365 | ||
| 360 | buffer = kzalloc(size, GFP_KERNEL); | 366 | write_end = min(end, int_th); |
| 361 | if (!buffer) | ||
| 362 | return NULL; | ||
| 363 | 367 | ||
| 364 | current->mm->total_vm += pgsz; | 368 | /* if we are already beyond the interrupt threshold, |
| 365 | current->mm->locked_vm += pgsz; | 369 | * we fill the entire buffer */ |
| 370 | if (write_end <= index) | ||
| 371 | write_end = end; | ||
| 366 | 372 | ||
| 367 | if (pages) | 373 | if (write_end <= index) |
| 368 | *pages = pgsz; | 374 | break; |
| 375 | |||
| 376 | write_size = min((unsigned long) size, write_end - index); | ||
| 377 | memcpy((void *)index, record, write_size); | ||
| 369 | 378 | ||
| 370 | return buffer; | 379 | record = (const char *)record + write_size; |
| 380 | size -= write_size; | ||
| 381 | bytes_written += write_size; | ||
| 382 | |||
| 383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
| 384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
| 385 | |||
| 386 | /* zero out trailing bytes */ | ||
| 387 | memset((char *)index + write_size, 0, | ||
| 388 | adj_write_size - write_size); | ||
| 389 | index += adj_write_size; | ||
| 390 | |||
| 391 | if (index >= end) | ||
| 392 | index = base; | ||
| 393 | ds_set(context->ds, qual, ds_index, index); | ||
| 394 | |||
| 395 | if (index >= int_th) | ||
| 396 | ds_overflow(context, qual); | ||
| 397 | } | ||
| 398 | |||
| 399 | return bytes_written; | ||
| 371 | } | 400 | } |
| 372 | 401 | ||
| 373 | static int ds_request(struct task_struct *task, void *base, size_t size, | 402 | |
| 374 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | 403 | /* |
| 404 | * Branch Trace Store (BTS) uses the following format. Different | ||
| 405 | * architectures vary in the size of those fields. | ||
| 406 | * - source linear address | ||
| 407 | * - destination linear address | ||
| 408 | * - flags | ||
| 409 | * | ||
| 410 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 411 | * architectures use 32bit pointers in 32bit mode. | ||
| 412 | * | ||
| 413 | * We compute the base address for the first 8 fields based on: | ||
| 414 | * - the field size stored in the DS configuration | ||
| 415 | * - the relative field position | ||
| 416 | * | ||
| 417 | * In order to store additional information in the BTS buffer, we use | ||
| 418 | * a special source address to indicate that the record requires | ||
| 419 | * special interpretation. | ||
| 420 | * | ||
| 421 | * Netburst indicated via a bit in the flags field whether the branch | ||
| 422 | * was predicted; this is ignored. | ||
| 423 | * | ||
| 424 | * We use two levels of abstraction: | ||
| 425 | * - the raw data level defined here | ||
| 426 | * - an arch-independent level defined in ds.h | ||
| 427 | */ | ||
| 428 | |||
| 429 | enum bts_field { | ||
| 430 | bts_from, | ||
| 431 | bts_to, | ||
| 432 | bts_flags, | ||
| 433 | |||
| 434 | bts_qual = bts_from, | ||
| 435 | bts_jiffies = bts_to, | ||
| 436 | bts_pid = bts_flags, | ||
| 437 | |||
| 438 | bts_qual_mask = (bts_qual_max - 1), | ||
| 439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
| 440 | }; | ||
| 441 | |||
| 442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
| 375 | { | 443 | { |
| 376 | struct ds_context *context; | 444 | base += (ds_cfg.sizeof_field * field); |
| 377 | unsigned long buffer, adj; | 445 | return *(unsigned long *)base; |
| 378 | const unsigned long alignment = (1 << 3); | 446 | } |
| 379 | unsigned long irq; | 447 | |
| 380 | int error = 0; | 448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
| 449 | { | ||
| 450 | base += (ds_cfg.sizeof_field * field);; | ||
| 451 | (*(unsigned long *)base) = val; | ||
| 452 | } | ||
| 381 | 453 | ||
| 382 | if (!ds_cfg.sizeof_ds) | ||
| 383 | return -EOPNOTSUPP; | ||
| 384 | 454 | ||
| 385 | /* we require some space to do alignment adjustments below */ | 455 | /* |
| 386 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | 456 | * The raw BTS data is architecture dependent. |
| 457 | * | ||
| 458 | * For higher-level users, we give an arch-independent view. | ||
| 459 | * - ds.h defines struct bts_struct | ||
| 460 | * - bts_read translates one raw bts record into a bts_struct | ||
| 461 | * - bts_write translates one bts_struct into the raw format and | ||
| 462 | * writes it into the top of the parameter tracer's buffer. | ||
| 463 | * | ||
| 464 | * return: bytes read/written on success; -Eerrno, otherwise | ||
| 465 | */ | ||
| 466 | static int bts_read(struct bts_tracer *tracer, const void *at, | ||
| 467 | struct bts_struct *out) | ||
| 468 | { | ||
| 469 | if (!tracer) | ||
| 387 | return -EINVAL; | 470 | return -EINVAL; |
| 388 | 471 | ||
| 389 | /* buffer overflow notification is not yet implemented */ | 472 | if (at < tracer->trace.ds.begin) |
| 390 | if (ovfl) | 473 | return -EINVAL; |
| 391 | return -EOPNOTSUPP; | ||
| 392 | 474 | ||
| 475 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
| 476 | return -EINVAL; | ||
| 393 | 477 | ||
| 394 | context = ds_alloc_context(task); | 478 | memset(out, 0, sizeof(*out)); |
| 395 | if (!context) | 479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { |
| 396 | return -ENOMEM; | 480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); |
| 481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | ||
| 482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | ||
| 483 | } else { | ||
| 484 | out->qualifier = bts_branch; | ||
| 485 | out->variant.lbr.from = bts_get(at, bts_from); | ||
| 486 | out->variant.lbr.to = bts_get(at, bts_to); | ||
| 487 | |||
| 488 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
| 489 | out->qualifier = bts_invalid; | ||
| 490 | } | ||
| 397 | 491 | ||
| 398 | spin_lock_irqsave(&ds_lock, irq); | 492 | return ds_cfg.sizeof_rec[ds_bts]; |
| 493 | } | ||
| 399 | 494 | ||
| 400 | error = -EPERM; | 495 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) |
| 401 | if (!check_tracer(task)) | 496 | { |
| 402 | goto out_unlock; | 497 | unsigned char raw[MAX_SIZEOF_BTS]; |
| 403 | 498 | ||
| 404 | get_tracer(task); | 499 | if (!tracer) |
| 500 | return -EINVAL; | ||
| 405 | 501 | ||
| 406 | error = -EALREADY; | 502 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) |
| 407 | if (context->owner[qual] == current) | 503 | return -EOVERFLOW; |
| 408 | goto out_put_tracer; | ||
| 409 | error = -EPERM; | ||
| 410 | if (context->owner[qual] != NULL) | ||
| 411 | goto out_put_tracer; | ||
| 412 | context->owner[qual] = current; | ||
| 413 | 504 | ||
| 414 | spin_unlock_irqrestore(&ds_lock, irq); | 505 | switch (in->qualifier) { |
| 506 | case bts_invalid: | ||
| 507 | bts_set(raw, bts_from, 0); | ||
| 508 | bts_set(raw, bts_to, 0); | ||
| 509 | bts_set(raw, bts_flags, 0); | ||
| 510 | break; | ||
| 511 | case bts_branch: | ||
| 512 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
| 513 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
| 514 | bts_set(raw, bts_flags, 0); | ||
| 515 | break; | ||
| 516 | case bts_task_arrives: | ||
| 517 | case bts_task_departs: | ||
| 518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
| 519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | ||
| 520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | ||
| 521 | break; | ||
| 522 | default: | ||
| 523 | return -EINVAL; | ||
| 524 | } | ||
| 415 | 525 | ||
| 526 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
| 527 | ds_cfg.sizeof_rec[ds_bts]); | ||
| 528 | } | ||
| 416 | 529 | ||
| 417 | error = -ENOMEM; | ||
| 418 | if (!base) { | ||
| 419 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
| 420 | if (!base) | ||
| 421 | goto out_release; | ||
| 422 | 530 | ||
| 423 | context->buffer[qual] = base; | 531 | static void ds_write_config(struct ds_context *context, |
| 424 | } | 532 | struct ds_trace *cfg, enum ds_qualifier qual) |
| 425 | error = 0; | 533 | { |
| 534 | unsigned char *ds = context->ds; | ||
| 535 | |||
| 536 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
| 537 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
| 538 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
| 539 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
| 540 | } | ||
| 541 | |||
| 542 | static void ds_read_config(struct ds_context *context, | ||
| 543 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
| 544 | { | ||
| 545 | unsigned char *ds = context->ds; | ||
| 426 | 546 | ||
| 427 | context->callback[qual] = ovfl; | 547 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); |
| 548 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
| 549 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
| 550 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
| 551 | } | ||
| 552 | |||
| 553 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
| 554 | void *base, size_t size, size_t ith, | ||
| 555 | unsigned int flags) { | ||
| 556 | unsigned long buffer, adj; | ||
| 428 | 557 | ||
| 429 | /* adjust the buffer address and size to meet alignment | 558 | /* adjust the buffer address and size to meet alignment |
| 430 | * constraints: | 559 | * constraints: |
| @@ -436,410 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size, | |||
| 436 | */ | 565 | */ |
| 437 | buffer = (unsigned long)base; | 566 | buffer = (unsigned long)base; |
| 438 | 567 | ||
| 439 | adj = ALIGN(buffer, alignment) - buffer; | 568 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; |
| 440 | buffer += adj; | 569 | buffer += adj; |
| 441 | size -= adj; | 570 | size -= adj; |
| 442 | 571 | ||
| 443 | size /= ds_cfg.sizeof_rec[qual]; | 572 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
| 444 | size *= ds_cfg.sizeof_rec[qual]; | 573 | trace->size = ds_cfg.sizeof_rec[qual]; |
| 445 | |||
| 446 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
| 447 | ds_set(context->ds, qual, ds_index, buffer); | ||
| 448 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
| 449 | 574 | ||
| 450 | if (ovfl) { | 575 | size = (trace->n * trace->size); |
| 451 | /* todo: select a suitable interrupt threshold */ | ||
| 452 | } else | ||
| 453 | ds_set(context->ds, qual, | ||
| 454 | ds_interrupt_threshold, buffer + size + 1); | ||
| 455 | 576 | ||
| 456 | /* we keep the context until ds_release */ | 577 | trace->begin = (void *)buffer; |
| 457 | return error; | 578 | trace->top = trace->begin; |
| 458 | 579 | trace->end = (void *)(buffer + size); | |
| 459 | out_release: | 580 | /* The value for 'no threshold' is -1, which will set the |
| 460 | context->owner[qual] = NULL; | 581 | * threshold outside of the buffer, just like we want it. |
| 461 | ds_put_context(context); | 582 | */ |
| 462 | put_tracer(task); | 583 | trace->ith = (void *)(buffer + size - ith); |
| 463 | return error; | ||
| 464 | |||
| 465 | out_put_tracer: | ||
| 466 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 467 | ds_put_context(context); | ||
| 468 | put_tracer(task); | ||
| 469 | return error; | ||
| 470 | 584 | ||
| 471 | out_unlock: | 585 | trace->flags = flags; |
| 472 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 473 | ds_put_context(context); | ||
| 474 | return error; | ||
| 475 | } | 586 | } |
| 476 | 587 | ||
| 477 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | ||
| 478 | ds_ovfl_callback_t ovfl) | ||
| 479 | { | ||
| 480 | return ds_request(task, base, size, ovfl, ds_bts); | ||
| 481 | } | ||
| 482 | 588 | ||
| 483 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | 589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
| 484 | ds_ovfl_callback_t ovfl) | 590 | enum ds_qualifier qual, struct task_struct *task, |
| 485 | { | 591 | void *base, size_t size, size_t th, unsigned int flags) |
| 486 | return ds_request(task, base, size, ovfl, ds_pebs); | ||
| 487 | } | ||
| 488 | |||
| 489 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | ||
| 490 | { | 592 | { |
| 491 | struct ds_context *context; | 593 | struct ds_context *context; |
| 492 | int error; | 594 | int error; |
| 493 | 595 | ||
| 494 | context = ds_get_context(task); | 596 | error = -EINVAL; |
| 495 | error = ds_validate_access(context, qual); | 597 | if (!base) |
| 496 | if (error < 0) | ||
| 497 | goto out; | 598 | goto out; |
| 498 | 599 | ||
| 499 | kfree(context->buffer[qual]); | 600 | /* we require some space to do alignment adjustments below */ |
| 500 | context->buffer[qual] = NULL; | 601 | error = -EINVAL; |
| 501 | 602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | |
| 502 | current->mm->total_vm -= context->pages[qual]; | 603 | goto out; |
| 503 | current->mm->locked_vm -= context->pages[qual]; | ||
| 504 | context->pages[qual] = 0; | ||
| 505 | context->owner[qual] = NULL; | ||
| 506 | |||
| 507 | /* | ||
| 508 | * we put the context twice: | ||
| 509 | * once for the ds_get_context | ||
| 510 | * once for the corresponding ds_request | ||
| 511 | */ | ||
| 512 | ds_put_context(context); | ||
| 513 | out: | ||
| 514 | ds_put_context(context); | ||
| 515 | return error; | ||
| 516 | } | ||
| 517 | 604 | ||
| 518 | int ds_release_bts(struct task_struct *task) | 605 | if (th != (size_t)-1) { |
| 519 | { | 606 | th *= ds_cfg.sizeof_rec[qual]; |
| 520 | return ds_release(task, ds_bts); | ||
| 521 | } | ||
| 522 | 607 | ||
| 523 | int ds_release_pebs(struct task_struct *task) | 608 | error = -EINVAL; |
| 524 | { | 609 | if (size <= th) |
| 525 | return ds_release(task, ds_pebs); | 610 | goto out; |
| 526 | } | 611 | } |
| 527 | 612 | ||
| 528 | static int ds_get_index(struct task_struct *task, size_t *pos, | 613 | tracer->buffer = base; |
| 529 | enum ds_qualifier qual) | 614 | tracer->size = size; |
| 530 | { | ||
| 531 | struct ds_context *context; | ||
| 532 | unsigned long base, index; | ||
| 533 | int error; | ||
| 534 | 615 | ||
| 616 | error = -ENOMEM; | ||
| 535 | context = ds_get_context(task); | 617 | context = ds_get_context(task); |
| 536 | error = ds_validate_access(context, qual); | 618 | if (!context) |
| 537 | if (error < 0) | ||
| 538 | goto out; | 619 | goto out; |
| 620 | tracer->context = context; | ||
| 539 | 621 | ||
| 540 | base = ds_get(context->ds, qual, ds_buffer_base); | 622 | ds_init_ds_trace(trace, qual, base, size, th, flags); |
| 541 | index = ds_get(context->ds, qual, ds_index); | ||
| 542 | 623 | ||
| 543 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | 624 | error = 0; |
| 544 | if (pos) | ||
| 545 | *pos = error; | ||
| 546 | out: | 625 | out: |
| 547 | ds_put_context(context); | ||
| 548 | return error; | 626 | return error; |
| 549 | } | 627 | } |
| 550 | 628 | ||
| 551 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | 629 | struct bts_tracer *ds_request_bts(struct task_struct *task, |
| 552 | { | 630 | void *base, size_t size, |
| 553 | return ds_get_index(task, pos, ds_bts); | 631 | bts_ovfl_callback_t ovfl, size_t th, |
| 554 | } | 632 | unsigned int flags) |
| 555 | |||
| 556 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
| 557 | { | 633 | { |
| 558 | return ds_get_index(task, pos, ds_pebs); | 634 | struct bts_tracer *tracer; |
| 559 | } | 635 | unsigned long irq; |
| 560 | |||
| 561 | static int ds_get_end(struct task_struct *task, size_t *pos, | ||
| 562 | enum ds_qualifier qual) | ||
| 563 | { | ||
| 564 | struct ds_context *context; | ||
| 565 | unsigned long base, end; | ||
| 566 | int error; | 636 | int error; |
| 567 | 637 | ||
| 568 | context = ds_get_context(task); | 638 | error = -EOPNOTSUPP; |
| 569 | error = ds_validate_access(context, qual); | 639 | if (!ds_cfg.ctl[dsf_bts]) |
| 570 | if (error < 0) | ||
| 571 | goto out; | 640 | goto out; |
| 572 | 641 | ||
| 573 | base = ds_get(context->ds, qual, ds_buffer_base); | 642 | /* buffer overflow notification is not yet implemented */ |
| 574 | end = ds_get(context->ds, qual, ds_absolute_maximum); | 643 | error = -EOPNOTSUPP; |
| 644 | if (ovfl) | ||
| 645 | goto out; | ||
| 575 | 646 | ||
| 576 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | 647 | error = -ENOMEM; |
| 577 | if (pos) | 648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
| 578 | *pos = error; | 649 | if (!tracer) |
| 579 | out: | 650 | goto out; |
| 580 | ds_put_context(context); | 651 | tracer->ovfl = ovfl; |
| 581 | return error; | ||
| 582 | } | ||
| 583 | 652 | ||
| 584 | int ds_get_bts_end(struct task_struct *task, size_t *pos) | 653 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
| 585 | { | 654 | ds_bts, task, base, size, th, flags); |
| 586 | return ds_get_end(task, pos, ds_bts); | 655 | if (error < 0) |
| 587 | } | 656 | goto out_tracer; |
| 588 | 657 | ||
| 589 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) | ||
| 590 | { | ||
| 591 | return ds_get_end(task, pos, ds_pebs); | ||
| 592 | } | ||
| 593 | 658 | ||
| 594 | static int ds_access(struct task_struct *task, size_t index, | 659 | spin_lock_irqsave(&ds_lock, irq); |
| 595 | const void **record, enum ds_qualifier qual) | ||
| 596 | { | ||
| 597 | struct ds_context *context; | ||
| 598 | unsigned long base, idx; | ||
| 599 | int error; | ||
| 600 | 660 | ||
| 601 | if (!record) | 661 | error = -EPERM; |
| 602 | return -EINVAL; | 662 | if (!check_tracer(task)) |
| 663 | goto out_unlock; | ||
| 664 | get_tracer(task); | ||
| 603 | 665 | ||
| 604 | context = ds_get_context(task); | 666 | error = -EPERM; |
| 605 | error = ds_validate_access(context, qual); | 667 | if (tracer->ds.context->bts_master) |
| 606 | if (error < 0) | 668 | goto out_put_tracer; |
| 607 | goto out; | 669 | tracer->ds.context->bts_master = tracer; |
| 608 | 670 | ||
| 609 | base = ds_get(context->ds, qual, ds_buffer_base); | 671 | spin_unlock_irqrestore(&ds_lock, irq); |
| 610 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
| 611 | 672 | ||
| 612 | error = -EINVAL; | ||
| 613 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) | ||
| 614 | goto out; | ||
| 615 | 673 | ||
| 616 | *record = (const void *)idx; | 674 | tracer->trace.read = bts_read; |
| 617 | error = ds_cfg.sizeof_rec[qual]; | 675 | tracer->trace.write = bts_write; |
| 618 | out: | ||
| 619 | ds_put_context(context); | ||
| 620 | return error; | ||
| 621 | } | ||
| 622 | 676 | ||
| 623 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) | 677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
| 624 | { | 678 | ds_resume_bts(tracer); |
| 625 | return ds_access(task, index, record, ds_bts); | ||
| 626 | } | ||
| 627 | 679 | ||
| 628 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) | 680 | return tracer; |
| 629 | { | 681 | |
| 630 | return ds_access(task, index, record, ds_pebs); | 682 | out_put_tracer: |
| 683 | put_tracer(task); | ||
| 684 | out_unlock: | ||
| 685 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 686 | ds_put_context(tracer->ds.context); | ||
| 687 | out_tracer: | ||
| 688 | kfree(tracer); | ||
| 689 | out: | ||
| 690 | return ERR_PTR(error); | ||
| 631 | } | 691 | } |
| 632 | 692 | ||
| 633 | static int ds_write(struct task_struct *task, const void *record, size_t size, | 693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
| 634 | enum ds_qualifier qual, int force) | 694 | void *base, size_t size, |
| 695 | pebs_ovfl_callback_t ovfl, size_t th, | ||
| 696 | unsigned int flags) | ||
| 635 | { | 697 | { |
| 636 | struct ds_context *context; | 698 | struct pebs_tracer *tracer; |
| 699 | unsigned long irq; | ||
| 637 | int error; | 700 | int error; |
| 638 | 701 | ||
| 639 | if (!record) | 702 | /* buffer overflow notification is not yet implemented */ |
| 640 | return -EINVAL; | 703 | error = -EOPNOTSUPP; |
| 704 | if (ovfl) | ||
| 705 | goto out; | ||
| 641 | 706 | ||
| 642 | error = -EPERM; | 707 | error = -ENOMEM; |
| 643 | context = ds_get_context(task); | 708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
| 644 | if (!context) | 709 | if (!tracer) |
| 645 | goto out; | 710 | goto out; |
| 711 | tracer->ovfl = ovfl; | ||
| 646 | 712 | ||
| 647 | if (!force) { | 713 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
| 648 | error = ds_validate_access(context, qual); | 714 | ds_pebs, task, base, size, th, flags); |
| 649 | if (error < 0) | 715 | if (error < 0) |
| 650 | goto out; | 716 | goto out_tracer; |
| 651 | } | ||
| 652 | 717 | ||
| 653 | error = 0; | 718 | spin_lock_irqsave(&ds_lock, irq); |
| 654 | while (size) { | ||
| 655 | unsigned long base, index, end, write_end, int_th; | ||
| 656 | unsigned long write_size, adj_write_size; | ||
| 657 | 719 | ||
| 658 | /* | 720 | error = -EPERM; |
| 659 | * write as much as possible without producing an | 721 | if (!check_tracer(task)) |
| 660 | * overflow interrupt. | 722 | goto out_unlock; |
| 661 | * | 723 | get_tracer(task); |
| 662 | * interrupt_threshold must either be | ||
| 663 | * - bigger than absolute_maximum or | ||
| 664 | * - point to a record between buffer_base and absolute_maximum | ||
| 665 | * | ||
| 666 | * index points to a valid record. | ||
| 667 | */ | ||
| 668 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 669 | index = ds_get(context->ds, qual, ds_index); | ||
| 670 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 671 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
| 672 | 724 | ||
| 673 | write_end = min(end, int_th); | 725 | error = -EPERM; |
| 726 | if (tracer->ds.context->pebs_master) | ||
| 727 | goto out_put_tracer; | ||
| 728 | tracer->ds.context->pebs_master = tracer; | ||
| 674 | 729 | ||
| 675 | /* if we are already beyond the interrupt threshold, | 730 | spin_unlock_irqrestore(&ds_lock, irq); |
| 676 | * we fill the entire buffer */ | ||
| 677 | if (write_end <= index) | ||
| 678 | write_end = end; | ||
| 679 | 731 | ||
| 680 | if (write_end <= index) | 732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
| 681 | goto out; | 733 | ds_resume_pebs(tracer); |
| 682 | 734 | ||
| 683 | write_size = min((unsigned long) size, write_end - index); | 735 | return tracer; |
| 684 | memcpy((void *)index, record, write_size); | ||
| 685 | 736 | ||
| 686 | record = (const char *)record + write_size; | 737 | out_put_tracer: |
| 687 | size -= write_size; | 738 | put_tracer(task); |
| 688 | error += write_size; | 739 | out_unlock: |
| 740 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 741 | ds_put_context(tracer->ds.context); | ||
| 742 | out_tracer: | ||
| 743 | kfree(tracer); | ||
| 744 | out: | ||
| 745 | return ERR_PTR(error); | ||
| 746 | } | ||
| 689 | 747 | ||
| 690 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 748 | void ds_release_bts(struct bts_tracer *tracer) |
| 691 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 749 | { |
| 750 | if (!tracer) | ||
| 751 | return; | ||
| 692 | 752 | ||
| 693 | /* zero out trailing bytes */ | 753 | ds_suspend_bts(tracer); |
| 694 | memset((char *)index + write_size, 0, | ||
| 695 | adj_write_size - write_size); | ||
| 696 | index += adj_write_size; | ||
| 697 | 754 | ||
| 698 | if (index >= end) | 755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
| 699 | index = base; | 756 | tracer->ds.context->bts_master = NULL; |
| 700 | ds_set(context->ds, qual, ds_index, index); | ||
| 701 | 757 | ||
| 702 | if (index >= int_th) | 758 | put_tracer(tracer->ds.context->task); |
| 703 | ds_overflow(task, context, qual); | 759 | ds_put_context(tracer->ds.context); |
| 704 | } | ||
| 705 | 760 | ||
| 706 | out: | 761 | kfree(tracer); |
| 707 | ds_put_context(context); | ||
| 708 | return error; | ||
| 709 | } | 762 | } |
| 710 | 763 | ||
| 711 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) | 764 | void ds_suspend_bts(struct bts_tracer *tracer) |
| 712 | { | 765 | { |
| 713 | return ds_write(task, record, size, ds_bts, /* force = */ 0); | 766 | struct task_struct *task; |
| 714 | } | ||
| 715 | 767 | ||
| 716 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) | 768 | if (!tracer) |
| 717 | { | 769 | return; |
| 718 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); | ||
| 719 | } | ||
| 720 | 770 | ||
| 721 | int ds_unchecked_write_bts(struct task_struct *task, | 771 | task = tracer->ds.context->task; |
| 722 | const void *record, size_t size) | ||
| 723 | { | ||
| 724 | return ds_write(task, record, size, ds_bts, /* force = */ 1); | ||
| 725 | } | ||
| 726 | 772 | ||
| 727 | int ds_unchecked_write_pebs(struct task_struct *task, | 773 | if (!task || (task == current)) |
| 728 | const void *record, size_t size) | 774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); |
| 729 | { | 775 | |
| 730 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); | 776 | if (task) { |
| 777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | ||
| 778 | |||
| 779 | if (!task->thread.debugctlmsr) | ||
| 780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
| 781 | } | ||
| 731 | } | 782 | } |
| 732 | 783 | ||
| 733 | static int ds_reset_or_clear(struct task_struct *task, | 784 | void ds_resume_bts(struct bts_tracer *tracer) |
| 734 | enum ds_qualifier qual, int clear) | ||
| 735 | { | 785 | { |
| 736 | struct ds_context *context; | 786 | struct task_struct *task; |
| 737 | unsigned long base, end; | 787 | unsigned long control; |
| 738 | int error; | ||
| 739 | 788 | ||
| 740 | context = ds_get_context(task); | 789 | if (!tracer) |
| 741 | error = ds_validate_access(context, qual); | 790 | return; |
| 742 | if (error < 0) | ||
| 743 | goto out; | ||
| 744 | 791 | ||
| 745 | base = ds_get(context->ds, qual, ds_buffer_base); | 792 | task = tracer->ds.context->task; |
| 746 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 747 | 793 | ||
| 748 | if (clear) | 794 | control = ds_cfg.ctl[dsf_bts]; |
| 749 | memset((void *)base, 0, end - base); | 795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
| 796 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
| 797 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
| 798 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
| 750 | 799 | ||
| 751 | ds_set(context->ds, qual, ds_index, base); | 800 | if (task) { |
| 801 | task->thread.debugctlmsr |= control; | ||
| 802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
| 803 | } | ||
| 752 | 804 | ||
| 753 | error = 0; | 805 | if (!task || (task == current)) |
| 754 | out: | 806 | update_debugctlmsr(get_debugctlmsr() | control); |
| 755 | ds_put_context(context); | ||
| 756 | return error; | ||
| 757 | } | 807 | } |
| 758 | 808 | ||
| 759 | int ds_reset_bts(struct task_struct *task) | 809 | void ds_release_pebs(struct pebs_tracer *tracer) |
| 760 | { | 810 | { |
| 761 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); | 811 | if (!tracer) |
| 812 | return; | ||
| 813 | |||
| 814 | ds_suspend_pebs(tracer); | ||
| 815 | |||
| 816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
| 817 | tracer->ds.context->pebs_master = NULL; | ||
| 818 | |||
| 819 | put_tracer(tracer->ds.context->task); | ||
| 820 | ds_put_context(tracer->ds.context); | ||
| 821 | |||
| 822 | kfree(tracer); | ||
| 762 | } | 823 | } |
| 763 | 824 | ||
| 764 | int ds_reset_pebs(struct task_struct *task) | 825 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
| 765 | { | 826 | { |
| 766 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | 827 | |
| 767 | } | 828 | } |
| 768 | 829 | ||
| 769 | int ds_clear_bts(struct task_struct *task) | 830 | void ds_resume_pebs(struct pebs_tracer *tracer) |
| 770 | { | 831 | { |
| 771 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); | 832 | |
| 772 | } | 833 | } |
| 773 | 834 | ||
| 774 | int ds_clear_pebs(struct task_struct *task) | 835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
| 775 | { | 836 | { |
| 776 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); | 837 | if (!tracer) |
| 838 | return NULL; | ||
| 839 | |||
| 840 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
| 841 | return &tracer->trace; | ||
| 777 | } | 842 | } |
| 778 | 843 | ||
| 779 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) | 844 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
| 780 | { | 845 | { |
| 781 | struct ds_context *context; | 846 | if (!tracer) |
| 782 | int error; | 847 | return NULL; |
| 848 | |||
| 849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
| 850 | tracer->trace.reset_value = | ||
| 851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | ||
| 783 | 852 | ||
| 784 | if (!value) | 853 | return &tracer->trace; |
| 854 | } | ||
| 855 | |||
| 856 | int ds_reset_bts(struct bts_tracer *tracer) | ||
| 857 | { | ||
| 858 | if (!tracer) | ||
| 785 | return -EINVAL; | 859 | return -EINVAL; |
| 786 | 860 | ||
| 787 | context = ds_get_context(task); | 861 | tracer->trace.ds.top = tracer->trace.ds.begin; |
| 788 | error = ds_validate_access(context, ds_pebs); | ||
| 789 | if (error < 0) | ||
| 790 | goto out; | ||
| 791 | 862 | ||
| 792 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); | 863 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
| 864 | (unsigned long)tracer->trace.ds.top); | ||
| 793 | 865 | ||
| 794 | error = 0; | 866 | return 0; |
| 795 | out: | ||
| 796 | ds_put_context(context); | ||
| 797 | return error; | ||
| 798 | } | 867 | } |
| 799 | 868 | ||
| 800 | int ds_set_pebs_reset(struct task_struct *task, u64 value) | 869 | int ds_reset_pebs(struct pebs_tracer *tracer) |
| 801 | { | 870 | { |
| 802 | struct ds_context *context; | 871 | if (!tracer) |
| 803 | int error; | 872 | return -EINVAL; |
| 804 | 873 | ||
| 805 | context = ds_get_context(task); | 874 | tracer->trace.ds.top = tracer->trace.ds.begin; |
| 806 | error = ds_validate_access(context, ds_pebs); | ||
| 807 | if (error < 0) | ||
| 808 | goto out; | ||
| 809 | 875 | ||
| 810 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; | 876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
| 877 | (unsigned long)tracer->trace.ds.top); | ||
| 811 | 878 | ||
| 812 | error = 0; | 879 | return 0; |
| 813 | out: | 880 | } |
| 814 | ds_put_context(context); | 881 | |
| 815 | return error; | 882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) |
| 883 | { | ||
| 884 | if (!tracer) | ||
| 885 | return -EINVAL; | ||
| 886 | |||
| 887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | ||
| 888 | |||
| 889 | return 0; | ||
| 816 | } | 890 | } |
| 817 | 891 | ||
| 818 | static const struct ds_configuration ds_cfg_var = { | 892 | static const struct ds_configuration ds_cfg_netburst = { |
| 819 | .sizeof_ds = sizeof(long) * 12, | 893 | .name = "netburst", |
| 820 | .sizeof_field = sizeof(long), | 894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
| 821 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | 895 | .ctl[dsf_bts_kernel] = (1 << 5), |
| 896 | .ctl[dsf_bts_user] = (1 << 6), | ||
| 897 | |||
| 898 | .sizeof_field = sizeof(long), | ||
| 899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
| 822 | #ifdef __i386__ | 900 | #ifdef __i386__ |
| 823 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | 901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
| 824 | #else | 902 | #else |
| 825 | .sizeof_rec[ds_pebs] = sizeof(long) * 18 | 903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
| 826 | #endif | 904 | #endif |
| 827 | }; | 905 | }; |
| 828 | static const struct ds_configuration ds_cfg_64 = { | 906 | static const struct ds_configuration ds_cfg_pentium_m = { |
| 829 | .sizeof_ds = 8 * 12, | 907 | .name = "pentium m", |
| 830 | .sizeof_field = 8, | 908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
| 831 | .sizeof_rec[ds_bts] = 8 * 3, | 909 | |
| 910 | .sizeof_field = sizeof(long), | ||
| 911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
| 832 | #ifdef __i386__ | 912 | #ifdef __i386__ |
| 833 | .sizeof_rec[ds_pebs] = 8 * 10 | 913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
| 834 | #else | 914 | #else |
| 835 | .sizeof_rec[ds_pebs] = 8 * 18 | 915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
| 836 | #endif | 916 | #endif |
| 837 | }; | 917 | }; |
| 918 | static const struct ds_configuration ds_cfg_core2 = { | ||
| 919 | .name = "core 2", | ||
| 920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 921 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
| 922 | .ctl[dsf_bts_user] = (1 << 10), | ||
| 923 | |||
| 924 | .sizeof_field = 8, | ||
| 925 | .sizeof_rec[ds_bts] = 8 * 3, | ||
| 926 | .sizeof_rec[ds_pebs] = 8 * 18, | ||
| 927 | }; | ||
| 838 | 928 | ||
| 839 | static inline void | 929 | static void |
| 840 | ds_configure(const struct ds_configuration *cfg) | 930 | ds_configure(const struct ds_configuration *cfg) |
| 841 | { | 931 | { |
| 932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
| 842 | ds_cfg = *cfg; | 933 | ds_cfg = *cfg; |
| 934 | |||
| 935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | ||
| 936 | |||
| 937 | if (!cpu_has_bts) { | ||
| 938 | ds_cfg.ctl[dsf_bts] = 0; | ||
| 939 | printk(KERN_INFO "[ds] bts not available\n"); | ||
| 940 | } | ||
| 941 | if (!cpu_has_pebs) | ||
| 942 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
| 943 | |||
| 944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | ||
| 843 | } | 945 | } |
| 844 | 946 | ||
| 845 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
| @@ -847,16 +949,15 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 847 | switch (c->x86) { | 949 | switch (c->x86) { |
| 848 | case 0x6: | 950 | case 0x6: |
| 849 | switch (c->x86_model) { | 951 | switch (c->x86_model) { |
| 952 | case 0 ... 0xC: | ||
| 953 | /* sorry, don't know about them */ | ||
| 954 | break; | ||
| 850 | case 0xD: | 955 | case 0xD: |
| 851 | case 0xE: /* Pentium M */ | 956 | case 0xE: /* Pentium M */ |
| 852 | ds_configure(&ds_cfg_var); | 957 | ds_configure(&ds_cfg_pentium_m); |
| 853 | break; | 958 | break; |
| 854 | case 0xF: /* Core2 */ | 959 | default: /* Core2, Atom, ... */ |
| 855 | case 0x1C: /* Atom */ | 960 | ds_configure(&ds_cfg_core2); |
| 856 | ds_configure(&ds_cfg_64); | ||
| 857 | break; | ||
| 858 | default: | ||
| 859 | /* sorry, don't know about them */ | ||
| 860 | break; | 961 | break; |
| 861 | } | 962 | } |
| 862 | break; | 963 | break; |
| @@ -865,7 +966,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 865 | case 0x0: | 966 | case 0x0: |
| 866 | case 0x1: | 967 | case 0x1: |
| 867 | case 0x2: /* Netburst */ | 968 | case 0x2: /* Netburst */ |
| 868 | ds_configure(&ds_cfg_var); | 969 | ds_configure(&ds_cfg_netburst); |
| 869 | break; | 970 | break; |
| 870 | default: | 971 | default: |
| 871 | /* sorry, don't know about them */ | 972 | /* sorry, don't know about them */ |
| @@ -878,12 +979,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 878 | } | 979 | } |
| 879 | } | 980 | } |
| 880 | 981 | ||
| 881 | void ds_free(struct ds_context *context) | 982 | /* |
| 983 | * Change the DS configuration from tracing prev to tracing next. | ||
| 984 | */ | ||
| 985 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
| 986 | { | ||
| 987 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
| 988 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
| 989 | |||
| 990 | if (prev_ctx) { | ||
| 991 | update_debugctlmsr(0); | ||
| 992 | |||
| 993 | if (prev_ctx->bts_master && | ||
| 994 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
| 995 | struct bts_struct ts = { | ||
| 996 | .qualifier = bts_task_departs, | ||
| 997 | .variant.timestamp.jiffies = jiffies_64, | ||
| 998 | .variant.timestamp.pid = prev->pid | ||
| 999 | }; | ||
| 1000 | bts_write(prev_ctx->bts_master, &ts); | ||
| 1001 | } | ||
| 1002 | } | ||
| 1003 | |||
| 1004 | if (next_ctx) { | ||
| 1005 | if (next_ctx->bts_master && | ||
| 1006 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
| 1007 | struct bts_struct ts = { | ||
| 1008 | .qualifier = bts_task_arrives, | ||
| 1009 | .variant.timestamp.jiffies = jiffies_64, | ||
| 1010 | .variant.timestamp.pid = next->pid | ||
| 1011 | }; | ||
| 1012 | bts_write(next_ctx->bts_master, &ts); | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | update_debugctlmsr(next->thread.debugctlmsr); | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | ||
| 1022 | { | ||
| 1023 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | ||
| 1024 | tsk->thread.ds_ctx = NULL; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | void ds_exit_thread(struct task_struct *tsk) | ||
| 882 | { | 1028 | { |
| 883 | /* This is called when the task owning the parameter context | 1029 | WARN_ON(tsk->thread.ds_ctx); |
| 884 | * is dying. There should not be any user of that context left | ||
| 885 | * to disturb us, anymore. */ | ||
| 886 | unsigned long leftovers = context->count; | ||
| 887 | while (leftovers--) | ||
| 888 | ds_put_context(context); | ||
| 889 | } | 1030 | } |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c new file mode 100644 index 000000000000..6b1f6f6f8661 --- /dev/null +++ b/arch/x86/kernel/dumpstack.c | |||
| @@ -0,0 +1,351 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
| 4 | */ | ||
| 5 | #include <linux/kallsyms.h> | ||
| 6 | #include <linux/kprobes.h> | ||
| 7 | #include <linux/uaccess.h> | ||
| 8 | #include <linux/utsname.h> | ||
| 9 | #include <linux/hardirq.h> | ||
| 10 | #include <linux/kdebug.h> | ||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/ptrace.h> | ||
| 13 | #include <linux/kexec.h> | ||
| 14 | #include <linux/bug.h> | ||
| 15 | #include <linux/nmi.h> | ||
| 16 | #include <linux/sysfs.h> | ||
| 17 | |||
| 18 | #include <asm/stacktrace.h> | ||
| 19 | |||
| 20 | #include "dumpstack.h" | ||
| 21 | |||
| 22 | int panic_on_unrecovered_nmi; | ||
| 23 | unsigned int code_bytes = 64; | ||
| 24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
| 25 | static int die_counter; | ||
| 26 | |||
| 27 | void printk_address(unsigned long address, int reliable) | ||
| 28 | { | ||
| 29 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
| 30 | reliable ? "" : "? ", (void *) address); | ||
| 31 | } | ||
| 32 | |||
| 33 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 34 | static void | ||
| 35 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
| 36 | const struct stacktrace_ops *ops, | ||
| 37 | struct thread_info *tinfo, int *graph) | ||
| 38 | { | ||
| 39 | struct task_struct *task = tinfo->task; | ||
| 40 | unsigned long ret_addr; | ||
| 41 | int index = task->curr_ret_stack; | ||
| 42 | |||
| 43 | if (addr != (unsigned long)return_to_handler) | ||
| 44 | return; | ||
| 45 | |||
| 46 | if (!task->ret_stack || index < *graph) | ||
| 47 | return; | ||
| 48 | |||
| 49 | index -= *graph; | ||
| 50 | ret_addr = task->ret_stack[index].ret; | ||
| 51 | |||
| 52 | ops->address(data, ret_addr, 1); | ||
| 53 | |||
| 54 | (*graph)++; | ||
| 55 | } | ||
| 56 | #else | ||
| 57 | static inline void | ||
| 58 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
| 59 | const struct stacktrace_ops *ops, | ||
| 60 | struct thread_info *tinfo, int *graph) | ||
| 61 | { } | ||
| 62 | #endif | ||
| 63 | |||
| 64 | /* | ||
| 65 | * x86-64 can have up to three kernel stacks: | ||
| 66 | * process stack | ||
| 67 | * interrupt stack | ||
| 68 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
| 69 | */ | ||
| 70 | |||
| 71 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
| 72 | void *p, unsigned int size, void *end) | ||
| 73 | { | ||
| 74 | void *t = tinfo; | ||
| 75 | if (end) { | ||
| 76 | if (p < end && p >= (end-THREAD_SIZE)) | ||
| 77 | return 1; | ||
| 78 | else | ||
| 79 | return 0; | ||
| 80 | } | ||
| 81 | return p > t && p < t + THREAD_SIZE - size; | ||
| 82 | } | ||
| 83 | |||
| 84 | unsigned long | ||
| 85 | print_context_stack(struct thread_info *tinfo, | ||
| 86 | unsigned long *stack, unsigned long bp, | ||
| 87 | const struct stacktrace_ops *ops, void *data, | ||
| 88 | unsigned long *end, int *graph) | ||
| 89 | { | ||
| 90 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
| 91 | |||
| 92 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
| 93 | unsigned long addr; | ||
| 94 | |||
| 95 | addr = *stack; | ||
| 96 | if (__kernel_text_address(addr)) { | ||
| 97 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
| 98 | ops->address(data, addr, 1); | ||
| 99 | frame = frame->next_frame; | ||
| 100 | bp = (unsigned long) frame; | ||
| 101 | } else { | ||
| 102 | ops->address(data, addr, bp == 0); | ||
| 103 | } | ||
| 104 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | ||
| 105 | } | ||
| 106 | stack++; | ||
| 107 | } | ||
| 108 | return bp; | ||
| 109 | } | ||
| 110 | |||
| 111 | |||
| 112 | static void | ||
| 113 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
| 114 | { | ||
| 115 | printk(data); | ||
| 116 | print_symbol(msg, symbol); | ||
| 117 | printk("\n"); | ||
| 118 | } | ||
| 119 | |||
| 120 | static void print_trace_warning(void *data, char *msg) | ||
| 121 | { | ||
| 122 | printk("%s%s\n", (char *)data, msg); | ||
| 123 | } | ||
| 124 | |||
| 125 | static int print_trace_stack(void *data, char *name) | ||
| 126 | { | ||
| 127 | printk("%s <%s> ", (char *)data, name); | ||
| 128 | return 0; | ||
| 129 | } | ||
| 130 | |||
| 131 | /* | ||
| 132 | * Print one address/symbol entries per line. | ||
| 133 | */ | ||
| 134 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
| 135 | { | ||
| 136 | touch_nmi_watchdog(); | ||
| 137 | printk(data); | ||
| 138 | printk_address(addr, reliable); | ||
| 139 | } | ||
| 140 | |||
| 141 | static const struct stacktrace_ops print_trace_ops = { | ||
| 142 | .warning = print_trace_warning, | ||
| 143 | .warning_symbol = print_trace_warning_symbol, | ||
| 144 | .stack = print_trace_stack, | ||
| 145 | .address = print_trace_address, | ||
| 146 | }; | ||
| 147 | |||
| 148 | void | ||
| 149 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 150 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
| 151 | { | ||
| 152 | printk("%sCall Trace:\n", log_lvl); | ||
| 153 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
| 154 | } | ||
| 155 | |||
| 156 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 157 | unsigned long *stack, unsigned long bp) | ||
| 158 | { | ||
| 159 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
| 160 | } | ||
| 161 | |||
| 162 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
| 163 | { | ||
| 164 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
| 165 | } | ||
| 166 | |||
| 167 | /* | ||
| 168 | * The architecture-independent dump_stack generator | ||
| 169 | */ | ||
| 170 | void dump_stack(void) | ||
| 171 | { | ||
| 172 | unsigned long bp = 0; | ||
| 173 | unsigned long stack; | ||
| 174 | |||
| 175 | #ifdef CONFIG_FRAME_POINTER | ||
| 176 | if (!bp) | ||
| 177 | get_bp(bp); | ||
| 178 | #endif | ||
| 179 | |||
| 180 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
| 181 | current->pid, current->comm, print_tainted(), | ||
| 182 | init_utsname()->release, | ||
| 183 | (int)strcspn(init_utsname()->version, " "), | ||
| 184 | init_utsname()->version); | ||
| 185 | show_trace(NULL, NULL, &stack, bp); | ||
| 186 | } | ||
| 187 | EXPORT_SYMBOL(dump_stack); | ||
| 188 | |||
| 189 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
| 190 | static int die_owner = -1; | ||
| 191 | static unsigned int die_nest_count; | ||
| 192 | |||
| 193 | unsigned __kprobes long oops_begin(void) | ||
| 194 | { | ||
| 195 | int cpu; | ||
| 196 | unsigned long flags; | ||
| 197 | |||
| 198 | oops_enter(); | ||
| 199 | |||
| 200 | /* racy, but better than risking deadlock. */ | ||
| 201 | raw_local_irq_save(flags); | ||
| 202 | cpu = smp_processor_id(); | ||
| 203 | if (!__raw_spin_trylock(&die_lock)) { | ||
| 204 | if (cpu == die_owner) | ||
| 205 | /* nested oops. should stop eventually */; | ||
| 206 | else | ||
| 207 | __raw_spin_lock(&die_lock); | ||
| 208 | } | ||
| 209 | die_nest_count++; | ||
| 210 | die_owner = cpu; | ||
| 211 | console_verbose(); | ||
| 212 | bust_spinlocks(1); | ||
| 213 | return flags; | ||
| 214 | } | ||
| 215 | |||
| 216 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
| 217 | { | ||
| 218 | if (regs && kexec_should_crash(current)) | ||
| 219 | crash_kexec(regs); | ||
| 220 | |||
| 221 | bust_spinlocks(0); | ||
| 222 | die_owner = -1; | ||
| 223 | add_taint(TAINT_DIE); | ||
| 224 | die_nest_count--; | ||
| 225 | if (!die_nest_count) | ||
| 226 | /* Nest count reaches zero, release the lock. */ | ||
| 227 | __raw_spin_unlock(&die_lock); | ||
| 228 | raw_local_irq_restore(flags); | ||
| 229 | oops_exit(); | ||
| 230 | |||
| 231 | if (!signr) | ||
| 232 | return; | ||
| 233 | if (in_interrupt()) | ||
| 234 | panic("Fatal exception in interrupt"); | ||
| 235 | if (panic_on_oops) | ||
| 236 | panic("Fatal exception"); | ||
| 237 | do_exit(signr); | ||
| 238 | } | ||
| 239 | |||
| 240 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
| 241 | { | ||
| 242 | #ifdef CONFIG_X86_32 | ||
| 243 | unsigned short ss; | ||
| 244 | unsigned long sp; | ||
| 245 | #endif | ||
| 246 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
| 247 | #ifdef CONFIG_PREEMPT | ||
| 248 | printk("PREEMPT "); | ||
| 249 | #endif | ||
| 250 | #ifdef CONFIG_SMP | ||
| 251 | printk("SMP "); | ||
| 252 | #endif | ||
| 253 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 254 | printk("DEBUG_PAGEALLOC"); | ||
| 255 | #endif | ||
| 256 | printk("\n"); | ||
| 257 | sysfs_printk_last_file(); | ||
| 258 | if (notify_die(DIE_OOPS, str, regs, err, | ||
| 259 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
| 260 | return 1; | ||
| 261 | |||
| 262 | show_registers(regs); | ||
| 263 | #ifdef CONFIG_X86_32 | ||
| 264 | sp = (unsigned long) (®s->sp); | ||
| 265 | savesegment(ss, ss); | ||
| 266 | if (user_mode(regs)) { | ||
| 267 | sp = regs->sp; | ||
| 268 | ss = regs->ss & 0xffff; | ||
| 269 | } | ||
| 270 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
| 271 | print_symbol("%s", regs->ip); | ||
| 272 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
| 273 | #else | ||
| 274 | /* Executive summary in case the oops scrolled away */ | ||
| 275 | printk(KERN_ALERT "RIP "); | ||
| 276 | printk_address(regs->ip, 1); | ||
| 277 | printk(" RSP <%016lx>\n", regs->sp); | ||
| 278 | #endif | ||
| 279 | return 0; | ||
| 280 | } | ||
| 281 | |||
| 282 | /* | ||
| 283 | * This is gone through when something in the kernel has done something bad | ||
| 284 | * and is about to be terminated: | ||
| 285 | */ | ||
| 286 | void die(const char *str, struct pt_regs *regs, long err) | ||
| 287 | { | ||
| 288 | unsigned long flags = oops_begin(); | ||
| 289 | int sig = SIGSEGV; | ||
| 290 | |||
| 291 | if (!user_mode_vm(regs)) | ||
| 292 | report_bug(regs->ip, regs); | ||
| 293 | |||
| 294 | if (__die(str, regs, err)) | ||
| 295 | sig = 0; | ||
| 296 | oops_end(flags, regs, sig); | ||
| 297 | } | ||
| 298 | |||
| 299 | void notrace __kprobes | ||
| 300 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
| 301 | { | ||
| 302 | unsigned long flags; | ||
| 303 | |||
| 304 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
| 305 | return; | ||
| 306 | |||
| 307 | /* | ||
| 308 | * We are in trouble anyway, lets at least try | ||
| 309 | * to get a message out. | ||
| 310 | */ | ||
| 311 | flags = oops_begin(); | ||
| 312 | printk(KERN_EMERG "%s", str); | ||
| 313 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
| 314 | smp_processor_id(), regs->ip); | ||
| 315 | show_registers(regs); | ||
| 316 | oops_end(flags, regs, 0); | ||
| 317 | if (do_panic || panic_on_oops) | ||
| 318 | panic("Non maskable interrupt"); | ||
| 319 | nmi_exit(); | ||
| 320 | local_irq_enable(); | ||
| 321 | do_exit(SIGBUS); | ||
| 322 | } | ||
| 323 | |||
| 324 | static int __init oops_setup(char *s) | ||
| 325 | { | ||
| 326 | if (!s) | ||
| 327 | return -EINVAL; | ||
| 328 | if (!strcmp(s, "panic")) | ||
| 329 | panic_on_oops = 1; | ||
| 330 | return 0; | ||
| 331 | } | ||
| 332 | early_param("oops", oops_setup); | ||
| 333 | |||
| 334 | static int __init kstack_setup(char *s) | ||
| 335 | { | ||
| 336 | if (!s) | ||
| 337 | return -EINVAL; | ||
| 338 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
| 339 | return 0; | ||
| 340 | } | ||
| 341 | early_param("kstack", kstack_setup); | ||
| 342 | |||
| 343 | static int __init code_bytes_setup(char *s) | ||
| 344 | { | ||
| 345 | code_bytes = simple_strtoul(s, NULL, 0); | ||
| 346 | if (code_bytes > 8192) | ||
| 347 | code_bytes = 8192; | ||
| 348 | |||
| 349 | return 1; | ||
| 350 | } | ||
| 351 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h new file mode 100644 index 000000000000..da87590b8698 --- /dev/null +++ b/arch/x86/kernel/dumpstack.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
| 4 | */ | ||
| 5 | |||
| 6 | #ifndef DUMPSTACK_H | ||
| 7 | #define DUMPSTACK_H | ||
| 8 | |||
| 9 | #ifdef CONFIG_X86_32 | ||
| 10 | #define STACKSLOTS_PER_LINE 8 | ||
| 11 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
| 12 | #else | ||
| 13 | #define STACKSLOTS_PER_LINE 4 | ||
| 14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
| 15 | #endif | ||
| 16 | |||
| 17 | extern unsigned long | ||
| 18 | print_context_stack(struct thread_info *tinfo, | ||
| 19 | unsigned long *stack, unsigned long bp, | ||
| 20 | const struct stacktrace_ops *ops, void *data, | ||
| 21 | unsigned long *end, int *graph); | ||
| 22 | |||
| 23 | extern void | ||
| 24 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 25 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
| 26 | |||
| 27 | extern void | ||
| 28 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 29 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
| 30 | |||
| 31 | extern unsigned int code_bytes; | ||
| 32 | extern int kstack_depth_to_print; | ||
| 33 | |||
| 34 | /* The form of the top of the frame on the stack */ | ||
| 35 | struct stack_frame { | ||
| 36 | struct stack_frame *next_frame; | ||
| 37 | unsigned long return_address; | ||
| 38 | }; | ||
| 39 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b3614752197b..d593cd1f58dc 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
| @@ -17,69 +17,14 @@ | |||
| 17 | 17 | ||
| 18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
| 19 | 19 | ||
| 20 | #define STACKSLOTS_PER_LINE 8 | 20 | #include "dumpstack.h" |
| 21 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
| 22 | |||
| 23 | int panic_on_unrecovered_nmi; | ||
| 24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
| 25 | static unsigned int code_bytes = 64; | ||
| 26 | static int die_counter; | ||
| 27 | |||
| 28 | void printk_address(unsigned long address, int reliable) | ||
| 29 | { | ||
| 30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
| 31 | reliable ? "" : "? ", (void *) address); | ||
| 32 | } | ||
| 33 | |||
| 34 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
| 35 | void *p, unsigned int size, void *end) | ||
| 36 | { | ||
| 37 | void *t = tinfo; | ||
| 38 | if (end) { | ||
| 39 | if (p < end && p >= (end-THREAD_SIZE)) | ||
| 40 | return 1; | ||
| 41 | else | ||
| 42 | return 0; | ||
| 43 | } | ||
| 44 | return p > t && p < t + THREAD_SIZE - size; | ||
| 45 | } | ||
| 46 | |||
| 47 | /* The form of the top of the frame on the stack */ | ||
| 48 | struct stack_frame { | ||
| 49 | struct stack_frame *next_frame; | ||
| 50 | unsigned long return_address; | ||
| 51 | }; | ||
| 52 | |||
| 53 | static inline unsigned long | ||
| 54 | print_context_stack(struct thread_info *tinfo, | ||
| 55 | unsigned long *stack, unsigned long bp, | ||
| 56 | const struct stacktrace_ops *ops, void *data, | ||
| 57 | unsigned long *end) | ||
| 58 | { | ||
| 59 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
| 60 | |||
| 61 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
| 62 | unsigned long addr; | ||
| 63 | |||
| 64 | addr = *stack; | ||
| 65 | if (__kernel_text_address(addr)) { | ||
| 66 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
| 67 | ops->address(data, addr, 1); | ||
| 68 | frame = frame->next_frame; | ||
| 69 | bp = (unsigned long) frame; | ||
| 70 | } else { | ||
| 71 | ops->address(data, addr, bp == 0); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | stack++; | ||
| 75 | } | ||
| 76 | return bp; | ||
| 77 | } | ||
| 78 | 21 | ||
| 79 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
| 80 | unsigned long *stack, unsigned long bp, | 23 | unsigned long *stack, unsigned long bp, |
| 81 | const struct stacktrace_ops *ops, void *data) | 24 | const struct stacktrace_ops *ops, void *data) |
| 82 | { | 25 | { |
| 26 | int graph = 0; | ||
| 27 | |||
| 83 | if (!task) | 28 | if (!task) |
| 84 | task = current; | 29 | task = current; |
| 85 | 30 | ||
| @@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 107 | 52 | ||
| 108 | context = (struct thread_info *) | 53 | context = (struct thread_info *) |
| 109 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 54 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
| 110 | bp = print_context_stack(context, stack, bp, ops, data, NULL); | 55 | bp = print_context_stack(context, stack, bp, ops, |
| 56 | data, NULL, &graph); | ||
| 111 | 57 | ||
| 112 | stack = (unsigned long *)context->previous_esp; | 58 | stack = (unsigned long *)context->previous_esp; |
| 113 | if (!stack) | 59 | if (!stack) |
| @@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 119 | } | 65 | } |
| 120 | EXPORT_SYMBOL(dump_trace); | 66 | EXPORT_SYMBOL(dump_trace); |
| 121 | 67 | ||
| 122 | static void | 68 | void |
| 123 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
| 124 | { | ||
| 125 | printk(data); | ||
| 126 | print_symbol(msg, symbol); | ||
| 127 | printk("\n"); | ||
| 128 | } | ||
| 129 | |||
| 130 | static void print_trace_warning(void *data, char *msg) | ||
| 131 | { | ||
| 132 | printk("%s%s\n", (char *)data, msg); | ||
| 133 | } | ||
| 134 | |||
| 135 | static int print_trace_stack(void *data, char *name) | ||
| 136 | { | ||
| 137 | printk("%s <%s> ", (char *)data, name); | ||
| 138 | return 0; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Print one address/symbol entries per line. | ||
| 143 | */ | ||
| 144 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
| 145 | { | ||
| 146 | touch_nmi_watchdog(); | ||
| 147 | printk(data); | ||
| 148 | printk_address(addr, reliable); | ||
| 149 | } | ||
| 150 | |||
| 151 | static const struct stacktrace_ops print_trace_ops = { | ||
| 152 | .warning = print_trace_warning, | ||
| 153 | .warning_symbol = print_trace_warning_symbol, | ||
| 154 | .stack = print_trace_stack, | ||
| 155 | .address = print_trace_address, | ||
| 156 | }; | ||
| 157 | |||
| 158 | static void | ||
| 159 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 160 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
| 161 | { | ||
| 162 | printk("%sCall Trace:\n", log_lvl); | ||
| 163 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
| 164 | } | ||
| 165 | |||
| 166 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 167 | unsigned long *stack, unsigned long bp) | ||
| 168 | { | ||
| 169 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
| 170 | } | ||
| 171 | |||
| 172 | static void | ||
| 173 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 69 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
| 174 | unsigned long *sp, unsigned long bp, char *log_lvl) | 70 | unsigned long *sp, unsigned long bp, char *log_lvl) |
| 175 | { | 71 | { |
| @@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 196 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 92 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
| 197 | } | 93 | } |
| 198 | 94 | ||
| 199 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
| 200 | { | ||
| 201 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
| 202 | } | ||
| 203 | |||
| 204 | /* | ||
| 205 | * The architecture-independent dump_stack generator | ||
| 206 | */ | ||
| 207 | void dump_stack(void) | ||
| 208 | { | ||
| 209 | unsigned long bp = 0; | ||
| 210 | unsigned long stack; | ||
| 211 | |||
| 212 | #ifdef CONFIG_FRAME_POINTER | ||
| 213 | if (!bp) | ||
| 214 | get_bp(bp); | ||
| 215 | #endif | ||
| 216 | |||
| 217 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
| 218 | current->pid, current->comm, print_tainted(), | ||
| 219 | init_utsname()->release, | ||
| 220 | (int)strcspn(init_utsname()->version, " "), | ||
| 221 | init_utsname()->version); | ||
| 222 | show_trace(NULL, NULL, &stack, bp); | ||
| 223 | } | ||
| 224 | |||
| 225 | EXPORT_SYMBOL(dump_stack); | ||
| 226 | 95 | ||
| 227 | void show_registers(struct pt_regs *regs) | 96 | void show_registers(struct pt_regs *regs) |
| 228 | { | 97 | { |
| @@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
| 283 | return ud2 == 0x0b0f; | 152 | return ud2 == 0x0b0f; |
| 284 | } | 153 | } |
| 285 | 154 | ||
| 286 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
| 287 | static int die_owner = -1; | ||
| 288 | static unsigned int die_nest_count; | ||
| 289 | |||
| 290 | unsigned __kprobes long oops_begin(void) | ||
| 291 | { | ||
| 292 | unsigned long flags; | ||
| 293 | |||
| 294 | oops_enter(); | ||
| 295 | |||
| 296 | if (die_owner != raw_smp_processor_id()) { | ||
| 297 | console_verbose(); | ||
| 298 | raw_local_irq_save(flags); | ||
| 299 | __raw_spin_lock(&die_lock); | ||
| 300 | die_owner = smp_processor_id(); | ||
| 301 | die_nest_count = 0; | ||
| 302 | bust_spinlocks(1); | ||
| 303 | } else { | ||
| 304 | raw_local_irq_save(flags); | ||
| 305 | } | ||
| 306 | die_nest_count++; | ||
| 307 | return flags; | ||
| 308 | } | ||
| 309 | |||
| 310 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
| 311 | { | ||
| 312 | bust_spinlocks(0); | ||
| 313 | die_owner = -1; | ||
| 314 | add_taint(TAINT_DIE); | ||
| 315 | __raw_spin_unlock(&die_lock); | ||
| 316 | raw_local_irq_restore(flags); | ||
| 317 | |||
| 318 | if (!regs) | ||
| 319 | return; | ||
| 320 | |||
| 321 | if (kexec_should_crash(current)) | ||
| 322 | crash_kexec(regs); | ||
| 323 | if (in_interrupt()) | ||
| 324 | panic("Fatal exception in interrupt"); | ||
| 325 | if (panic_on_oops) | ||
| 326 | panic("Fatal exception"); | ||
| 327 | oops_exit(); | ||
| 328 | do_exit(signr); | ||
| 329 | } | ||
| 330 | |||
| 331 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
| 332 | { | ||
| 333 | unsigned short ss; | ||
| 334 | unsigned long sp; | ||
| 335 | |||
| 336 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
| 337 | #ifdef CONFIG_PREEMPT | ||
| 338 | printk("PREEMPT "); | ||
| 339 | #endif | ||
| 340 | #ifdef CONFIG_SMP | ||
| 341 | printk("SMP "); | ||
| 342 | #endif | ||
| 343 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 344 | printk("DEBUG_PAGEALLOC"); | ||
| 345 | #endif | ||
| 346 | printk("\n"); | ||
| 347 | sysfs_printk_last_file(); | ||
| 348 | if (notify_die(DIE_OOPS, str, regs, err, | ||
| 349 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
| 350 | return 1; | ||
| 351 | |||
| 352 | show_registers(regs); | ||
| 353 | /* Executive summary in case the oops scrolled away */ | ||
| 354 | sp = (unsigned long) (®s->sp); | ||
| 355 | savesegment(ss, ss); | ||
| 356 | if (user_mode(regs)) { | ||
| 357 | sp = regs->sp; | ||
| 358 | ss = regs->ss & 0xffff; | ||
| 359 | } | ||
| 360 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
| 361 | print_symbol("%s", regs->ip); | ||
| 362 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
| 363 | return 0; | ||
| 364 | } | ||
| 365 | |||
| 366 | /* | ||
| 367 | * This is gone through when something in the kernel has done something bad | ||
| 368 | * and is about to be terminated: | ||
| 369 | */ | ||
| 370 | void die(const char *str, struct pt_regs *regs, long err) | ||
| 371 | { | ||
| 372 | unsigned long flags = oops_begin(); | ||
| 373 | |||
| 374 | if (die_nest_count < 3) { | ||
| 375 | report_bug(regs->ip, regs); | ||
| 376 | |||
| 377 | if (__die(str, regs, err)) | ||
| 378 | regs = NULL; | ||
| 379 | } else { | ||
| 380 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
| 381 | } | ||
| 382 | |||
| 383 | oops_end(flags, regs, SIGSEGV); | ||
| 384 | } | ||
| 385 | |||
| 386 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
| 387 | |||
| 388 | void notrace __kprobes | ||
| 389 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
| 390 | { | ||
| 391 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
| 392 | return; | ||
| 393 | |||
| 394 | spin_lock(&nmi_print_lock); | ||
| 395 | /* | ||
| 396 | * We are in trouble anyway, lets at least try | ||
| 397 | * to get a message out: | ||
| 398 | */ | ||
| 399 | bust_spinlocks(1); | ||
| 400 | printk(KERN_EMERG "%s", str); | ||
| 401 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
| 402 | smp_processor_id(), regs->ip); | ||
| 403 | show_registers(regs); | ||
| 404 | if (do_panic) | ||
| 405 | panic("Non maskable interrupt"); | ||
| 406 | console_silent(); | ||
| 407 | spin_unlock(&nmi_print_lock); | ||
| 408 | |||
| 409 | /* | ||
| 410 | * If we are in kernel we are probably nested up pretty bad | ||
| 411 | * and might aswell get out now while we still can: | ||
| 412 | */ | ||
| 413 | if (!user_mode_vm(regs)) { | ||
| 414 | current->thread.trap_no = 2; | ||
| 415 | crash_kexec(regs); | ||
| 416 | } | ||
| 417 | |||
| 418 | bust_spinlocks(0); | ||
| 419 | do_exit(SIGSEGV); | ||
| 420 | } | ||
| 421 | |||
| 422 | static int __init oops_setup(char *s) | ||
| 423 | { | ||
| 424 | if (!s) | ||
| 425 | return -EINVAL; | ||
| 426 | if (!strcmp(s, "panic")) | ||
| 427 | panic_on_oops = 1; | ||
| 428 | return 0; | ||
| 429 | } | ||
| 430 | early_param("oops", oops_setup); | ||
| 431 | |||
| 432 | static int __init kstack_setup(char *s) | ||
| 433 | { | ||
| 434 | if (!s) | ||
| 435 | return -EINVAL; | ||
| 436 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | early_param("kstack", kstack_setup); | ||
| 440 | |||
| 441 | static int __init code_bytes_setup(char *s) | ||
| 442 | { | ||
| 443 | code_bytes = simple_strtoul(s, NULL, 0); | ||
| 444 | if (code_bytes > 8192) | ||
| 445 | code_bytes = 8192; | ||
| 446 | |||
| 447 | return 1; | ||
| 448 | } | ||
| 449 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 96a5db7da8a7..c302d0707048 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
| @@ -17,19 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
| 19 | 19 | ||
| 20 | #define STACKSLOTS_PER_LINE 4 | 20 | #include "dumpstack.h" |
| 21 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
| 22 | |||
| 23 | int panic_on_unrecovered_nmi; | ||
| 24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
| 25 | static unsigned int code_bytes = 64; | ||
| 26 | static int die_counter; | ||
| 27 | |||
| 28 | void printk_address(unsigned long address, int reliable) | ||
| 29 | { | ||
| 30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
| 31 | reliable ? "" : "? ", (void *) address); | ||
| 32 | } | ||
| 33 | 21 | ||
| 34 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 22 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
| 35 | unsigned *usedp, char **idp) | 23 | unsigned *usedp, char **idp) |
| @@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
| 113 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 101 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
| 114 | */ | 102 | */ |
| 115 | 103 | ||
| 116 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
| 117 | void *p, unsigned int size, void *end) | ||
| 118 | { | ||
| 119 | void *t = tinfo; | ||
| 120 | if (end) { | ||
| 121 | if (p < end && p >= (end-THREAD_SIZE)) | ||
| 122 | return 1; | ||
| 123 | else | ||
| 124 | return 0; | ||
| 125 | } | ||
| 126 | return p > t && p < t + THREAD_SIZE - size; | ||
| 127 | } | ||
| 128 | |||
| 129 | /* The form of the top of the frame on the stack */ | ||
| 130 | struct stack_frame { | ||
| 131 | struct stack_frame *next_frame; | ||
| 132 | unsigned long return_address; | ||
| 133 | }; | ||
| 134 | |||
| 135 | static inline unsigned long | ||
| 136 | print_context_stack(struct thread_info *tinfo, | ||
| 137 | unsigned long *stack, unsigned long bp, | ||
| 138 | const struct stacktrace_ops *ops, void *data, | ||
| 139 | unsigned long *end) | ||
| 140 | { | ||
| 141 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
| 142 | |||
| 143 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
| 144 | unsigned long addr; | ||
| 145 | |||
| 146 | addr = *stack; | ||
| 147 | if (__kernel_text_address(addr)) { | ||
| 148 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
| 149 | ops->address(data, addr, 1); | ||
| 150 | frame = frame->next_frame; | ||
| 151 | bp = (unsigned long) frame; | ||
| 152 | } else { | ||
| 153 | ops->address(data, addr, bp == 0); | ||
| 154 | } | ||
| 155 | } | ||
| 156 | stack++; | ||
| 157 | } | ||
| 158 | return bp; | ||
| 159 | } | ||
| 160 | |||
| 161 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 104 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
| 162 | unsigned long *stack, unsigned long bp, | 105 | unsigned long *stack, unsigned long bp, |
| 163 | const struct stacktrace_ops *ops, void *data) | 106 | const struct stacktrace_ops *ops, void *data) |
| @@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 166 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | 109 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
| 167 | unsigned used = 0; | 110 | unsigned used = 0; |
| 168 | struct thread_info *tinfo; | 111 | struct thread_info *tinfo; |
| 112 | int graph = 0; | ||
| 169 | 113 | ||
| 170 | if (!task) | 114 | if (!task) |
| 171 | task = current; | 115 | task = current; |
| @@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 206 | break; | 150 | break; |
| 207 | 151 | ||
| 208 | bp = print_context_stack(tinfo, stack, bp, ops, | 152 | bp = print_context_stack(tinfo, stack, bp, ops, |
| 209 | data, estack_end); | 153 | data, estack_end, &graph); |
| 210 | ops->stack(data, "<EOE>"); | 154 | ops->stack(data, "<EOE>"); |
| 211 | /* | 155 | /* |
| 212 | * We link to the next stack via the | 156 | * We link to the next stack via the |
| @@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 225 | if (ops->stack(data, "IRQ") < 0) | 169 | if (ops->stack(data, "IRQ") < 0) |
| 226 | break; | 170 | break; |
| 227 | bp = print_context_stack(tinfo, stack, bp, | 171 | bp = print_context_stack(tinfo, stack, bp, |
| 228 | ops, data, irqstack_end); | 172 | ops, data, irqstack_end, &graph); |
| 229 | /* | 173 | /* |
| 230 | * We link to the next stack (which would be | 174 | * We link to the next stack (which would be |
| 231 | * the process stack normally) the last | 175 | * the process stack normally) the last |
| @@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 243 | /* | 187 | /* |
| 244 | * This handles the process stack: | 188 | * This handles the process stack: |
| 245 | */ | 189 | */ |
| 246 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | 190 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); |
| 247 | put_cpu(); | 191 | put_cpu(); |
| 248 | } | 192 | } |
| 249 | EXPORT_SYMBOL(dump_trace); | 193 | EXPORT_SYMBOL(dump_trace); |
| 250 | 194 | ||
| 251 | static void | 195 | void |
| 252 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
| 253 | { | ||
| 254 | printk(data); | ||
| 255 | print_symbol(msg, symbol); | ||
| 256 | printk("\n"); | ||
| 257 | } | ||
| 258 | |||
| 259 | static void print_trace_warning(void *data, char *msg) | ||
| 260 | { | ||
| 261 | printk("%s%s\n", (char *)data, msg); | ||
| 262 | } | ||
| 263 | |||
| 264 | static int print_trace_stack(void *data, char *name) | ||
| 265 | { | ||
| 266 | printk("%s <%s> ", (char *)data, name); | ||
| 267 | return 0; | ||
| 268 | } | ||
| 269 | |||
| 270 | /* | ||
| 271 | * Print one address/symbol entries per line. | ||
| 272 | */ | ||
| 273 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
| 274 | { | ||
| 275 | touch_nmi_watchdog(); | ||
| 276 | printk(data); | ||
| 277 | printk_address(addr, reliable); | ||
| 278 | } | ||
| 279 | |||
| 280 | static const struct stacktrace_ops print_trace_ops = { | ||
| 281 | .warning = print_trace_warning, | ||
| 282 | .warning_symbol = print_trace_warning_symbol, | ||
| 283 | .stack = print_trace_stack, | ||
| 284 | .address = print_trace_address, | ||
| 285 | }; | ||
| 286 | |||
| 287 | static void | ||
| 288 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 289 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
| 290 | { | ||
| 291 | printk("%sCall Trace:\n", log_lvl); | ||
| 292 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
| 293 | } | ||
| 294 | |||
| 295 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 296 | unsigned long *stack, unsigned long bp) | ||
| 297 | { | ||
| 298 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
| 299 | } | ||
| 300 | |||
| 301 | static void | ||
| 302 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 196 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
| 303 | unsigned long *sp, unsigned long bp, char *log_lvl) | 197 | unsigned long *sp, unsigned long bp, char *log_lvl) |
| 304 | { | 198 | { |
| @@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 342 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 236 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
| 343 | } | 237 | } |
| 344 | 238 | ||
| 345 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
| 346 | { | ||
| 347 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
| 348 | } | ||
| 349 | |||
| 350 | /* | ||
| 351 | * The architecture-independent dump_stack generator | ||
| 352 | */ | ||
| 353 | void dump_stack(void) | ||
| 354 | { | ||
| 355 | unsigned long bp = 0; | ||
| 356 | unsigned long stack; | ||
| 357 | |||
| 358 | #ifdef CONFIG_FRAME_POINTER | ||
| 359 | if (!bp) | ||
| 360 | get_bp(bp); | ||
| 361 | #endif | ||
| 362 | |||
| 363 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
| 364 | current->pid, current->comm, print_tainted(), | ||
| 365 | init_utsname()->release, | ||
| 366 | (int)strcspn(init_utsname()->version, " "), | ||
| 367 | init_utsname()->version); | ||
| 368 | show_trace(NULL, NULL, &stack, bp); | ||
| 369 | } | ||
| 370 | EXPORT_SYMBOL(dump_stack); | ||
| 371 | |||
| 372 | void show_registers(struct pt_regs *regs) | 239 | void show_registers(struct pt_regs *regs) |
| 373 | { | 240 | { |
| 374 | int i; | 241 | int i; |
| @@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
| 429 | return ud2 == 0x0b0f; | 296 | return ud2 == 0x0b0f; |
| 430 | } | 297 | } |
| 431 | 298 | ||
| 432 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
| 433 | static int die_owner = -1; | ||
| 434 | static unsigned int die_nest_count; | ||
| 435 | |||
| 436 | unsigned __kprobes long oops_begin(void) | ||
| 437 | { | ||
| 438 | int cpu; | ||
| 439 | unsigned long flags; | ||
| 440 | |||
| 441 | oops_enter(); | ||
| 442 | |||
| 443 | /* racy, but better than risking deadlock. */ | ||
| 444 | raw_local_irq_save(flags); | ||
| 445 | cpu = smp_processor_id(); | ||
| 446 | if (!__raw_spin_trylock(&die_lock)) { | ||
| 447 | if (cpu == die_owner) | ||
| 448 | /* nested oops. should stop eventually */; | ||
| 449 | else | ||
| 450 | __raw_spin_lock(&die_lock); | ||
| 451 | } | ||
| 452 | die_nest_count++; | ||
| 453 | die_owner = cpu; | ||
| 454 | console_verbose(); | ||
| 455 | bust_spinlocks(1); | ||
| 456 | return flags; | ||
| 457 | } | ||
| 458 | |||
| 459 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
| 460 | { | ||
| 461 | die_owner = -1; | ||
| 462 | bust_spinlocks(0); | ||
| 463 | die_nest_count--; | ||
| 464 | if (!die_nest_count) | ||
| 465 | /* Nest count reaches zero, release the lock. */ | ||
| 466 | __raw_spin_unlock(&die_lock); | ||
| 467 | raw_local_irq_restore(flags); | ||
| 468 | if (!regs) { | ||
| 469 | oops_exit(); | ||
| 470 | return; | ||
| 471 | } | ||
| 472 | if (in_interrupt()) | ||
| 473 | panic("Fatal exception in interrupt"); | ||
| 474 | if (panic_on_oops) | ||
| 475 | panic("Fatal exception"); | ||
| 476 | oops_exit(); | ||
| 477 | do_exit(signr); | ||
| 478 | } | ||
| 479 | |||
| 480 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
| 481 | { | ||
| 482 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
| 483 | #ifdef CONFIG_PREEMPT | ||
| 484 | printk("PREEMPT "); | ||
| 485 | #endif | ||
| 486 | #ifdef CONFIG_SMP | ||
| 487 | printk("SMP "); | ||
| 488 | #endif | ||
| 489 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 490 | printk("DEBUG_PAGEALLOC"); | ||
| 491 | #endif | ||
| 492 | printk("\n"); | ||
| 493 | sysfs_printk_last_file(); | ||
| 494 | if (notify_die(DIE_OOPS, str, regs, err, | ||
| 495 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
| 496 | return 1; | ||
| 497 | |||
| 498 | show_registers(regs); | ||
| 499 | add_taint(TAINT_DIE); | ||
| 500 | /* Executive summary in case the oops scrolled away */ | ||
| 501 | printk(KERN_ALERT "RIP "); | ||
| 502 | printk_address(regs->ip, 1); | ||
| 503 | printk(" RSP <%016lx>\n", regs->sp); | ||
| 504 | if (kexec_should_crash(current)) | ||
| 505 | crash_kexec(regs); | ||
| 506 | return 0; | ||
| 507 | } | ||
| 508 | |||
| 509 | void die(const char *str, struct pt_regs *regs, long err) | ||
| 510 | { | ||
| 511 | unsigned long flags = oops_begin(); | ||
| 512 | |||
| 513 | if (!user_mode(regs)) | ||
| 514 | report_bug(regs->ip, regs); | ||
| 515 | |||
| 516 | if (__die(str, regs, err)) | ||
| 517 | regs = NULL; | ||
| 518 | oops_end(flags, regs, SIGSEGV); | ||
| 519 | } | ||
| 520 | |||
| 521 | notrace __kprobes void | ||
| 522 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
| 523 | { | ||
| 524 | unsigned long flags; | ||
| 525 | |||
| 526 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
| 527 | return; | ||
| 528 | |||
| 529 | flags = oops_begin(); | ||
| 530 | /* | ||
| 531 | * We are in trouble anyway, lets at least try | ||
| 532 | * to get a message out. | ||
| 533 | */ | ||
| 534 | printk(KERN_EMERG "%s", str); | ||
| 535 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
| 536 | smp_processor_id(), regs->ip); | ||
| 537 | show_registers(regs); | ||
| 538 | if (kexec_should_crash(current)) | ||
| 539 | crash_kexec(regs); | ||
| 540 | if (do_panic || panic_on_oops) | ||
| 541 | panic("Non maskable interrupt"); | ||
| 542 | oops_end(flags, NULL, SIGBUS); | ||
| 543 | nmi_exit(); | ||
| 544 | local_irq_enable(); | ||
| 545 | do_exit(SIGBUS); | ||
| 546 | } | ||
| 547 | |||
| 548 | static int __init oops_setup(char *s) | ||
| 549 | { | ||
| 550 | if (!s) | ||
| 551 | return -EINVAL; | ||
| 552 | if (!strcmp(s, "panic")) | ||
| 553 | panic_on_oops = 1; | ||
| 554 | return 0; | ||
| 555 | } | ||
| 556 | early_param("oops", oops_setup); | ||
| 557 | |||
| 558 | static int __init kstack_setup(char *s) | ||
| 559 | { | ||
| 560 | if (!s) | ||
| 561 | return -EINVAL; | ||
| 562 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
| 563 | return 0; | ||
| 564 | } | ||
| 565 | early_param("kstack", kstack_setup); | ||
| 566 | |||
| 567 | static int __init code_bytes_setup(char *s) | ||
| 568 | { | ||
| 569 | code_bytes = simple_strtoul(s, NULL, 0); | ||
| 570 | if (code_bytes > 8192) | ||
| 571 | code_bytes = 8192; | ||
| 572 | |||
| 573 | return 1; | ||
| 574 | } | ||
| 575 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7aafeb5263ef..65a13943e098 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
| @@ -677,22 +677,6 @@ struct early_res { | |||
| 677 | }; | 677 | }; |
| 678 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | 678 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { |
| 679 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | 679 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ |
| 680 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) | ||
| 681 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | ||
| 682 | #endif | ||
| 683 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | ||
| 684 | /* | ||
| 685 | * But first pinch a few for the stack/trampoline stuff | ||
| 686 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
| 687 | * trampoline before removing it. (see the GDT stuff) | ||
| 688 | */ | ||
| 689 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, | ||
| 690 | /* | ||
| 691 | * Has to be in very low memory so we can execute | ||
| 692 | * real-mode AP code. | ||
| 693 | */ | ||
| 694 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, | ||
| 695 | #endif | ||
| 696 | {} | 680 | {} |
| 697 | }; | 681 | }; |
| 698 | 682 | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 1b894b72c0f5..744aa7fc49d5 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <asm/io_apic.h> | 17 | #include <asm/io_apic.h> |
| 18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
| 19 | #include <asm/iommu.h> | 19 | #include <asm/iommu.h> |
| 20 | #include <asm/gart.h> | ||
| 20 | 21 | ||
| 21 | static void __init fix_hypertransport_config(int num, int slot, int func) | 22 | static void __init fix_hypertransport_config(int num, int slot, int func) |
| 22 | { | 23 | { |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 34ad997d3834..23b138e31e9c 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
| @@ -875,49 +875,6 @@ static struct console early_dbgp_console = { | |||
| 875 | }; | 875 | }; |
| 876 | #endif | 876 | #endif |
| 877 | 877 | ||
| 878 | /* Console interface to a host file on AMD's SimNow! */ | ||
| 879 | |||
| 880 | static int simnow_fd; | ||
| 881 | |||
| 882 | enum { | ||
| 883 | MAGIC1 = 0xBACCD00A, | ||
| 884 | MAGIC2 = 0xCA110000, | ||
| 885 | XOPEN = 5, | ||
| 886 | XWRITE = 4, | ||
| 887 | }; | ||
| 888 | |||
| 889 | static noinline long simnow(long cmd, long a, long b, long c) | ||
| 890 | { | ||
| 891 | long ret; | ||
| 892 | |||
| 893 | asm volatile("cpuid" : | ||
| 894 | "=a" (ret) : | ||
| 895 | "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); | ||
| 896 | return ret; | ||
| 897 | } | ||
| 898 | |||
| 899 | static void __init simnow_init(char *str) | ||
| 900 | { | ||
| 901 | char *fn = "klog"; | ||
| 902 | |||
| 903 | if (*str == '=') | ||
| 904 | fn = ++str; | ||
| 905 | /* error ignored */ | ||
| 906 | simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644); | ||
| 907 | } | ||
| 908 | |||
| 909 | static void simnow_write(struct console *con, const char *s, unsigned n) | ||
| 910 | { | ||
| 911 | simnow(XWRITE, simnow_fd, (unsigned long)s, n); | ||
| 912 | } | ||
| 913 | |||
| 914 | static struct console simnow_console = { | ||
| 915 | .name = "simnow", | ||
| 916 | .write = simnow_write, | ||
| 917 | .flags = CON_PRINTBUFFER, | ||
| 918 | .index = -1, | ||
| 919 | }; | ||
| 920 | |||
| 921 | /* Direct interface for emergencies */ | 878 | /* Direct interface for emergencies */ |
| 922 | static struct console *early_console = &early_vga_console; | 879 | static struct console *early_console = &early_vga_console; |
| 923 | static int __initdata early_console_initialized; | 880 | static int __initdata early_console_initialized; |
| @@ -960,10 +917,6 @@ static int __init setup_early_printk(char *buf) | |||
| 960 | max_ypos = boot_params.screen_info.orig_video_lines; | 917 | max_ypos = boot_params.screen_info.orig_video_lines; |
| 961 | current_ypos = boot_params.screen_info.orig_y; | 918 | current_ypos = boot_params.screen_info.orig_y; |
| 962 | early_console = &early_vga_console; | 919 | early_console = &early_vga_console; |
| 963 | } else if (!strncmp(buf, "simnow", 6)) { | ||
| 964 | simnow_init(buf + 6); | ||
| 965 | early_console = &simnow_console; | ||
| 966 | keep_early = 1; | ||
| 967 | #ifdef CONFIG_EARLY_PRINTK_DBGP | 920 | #ifdef CONFIG_EARLY_PRINTK_DBGP |
| 968 | } else if (!strncmp(buf, "dbgp", 4)) { | 921 | } else if (!strncmp(buf, "dbgp", 4)) { |
| 969 | if (early_dbgp_init(buf+4) < 0) | 922 | if (early_dbgp_init(buf+4) < 0) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca1..d6f0490a7391 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -619,28 +619,37 @@ END(syscall_badsys) | |||
| 619 | 27:; | 619 | 27:; |
| 620 | 620 | ||
| 621 | /* | 621 | /* |
| 622 | * Build the entry stubs and pointer table with | 622 | * Build the entry stubs and pointer table with some assembler magic. |
| 623 | * some assembler magic. | 623 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a |
| 624 | * single cache line on all modern x86 implementations. | ||
| 624 | */ | 625 | */ |
| 625 | .section .rodata,"a" | 626 | .section .init.rodata,"a" |
| 626 | ENTRY(interrupt) | 627 | ENTRY(interrupt) |
| 627 | .text | 628 | .text |
| 628 | 629 | .p2align 5 | |
| 630 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
| 629 | ENTRY(irq_entries_start) | 631 | ENTRY(irq_entries_start) |
| 630 | RING0_INT_FRAME | 632 | RING0_INT_FRAME |
| 631 | vector=0 | 633 | vector=FIRST_EXTERNAL_VECTOR |
| 632 | .rept NR_VECTORS | 634 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
| 633 | ALIGN | 635 | .balign 32 |
| 634 | .if vector | 636 | .rept 7 |
| 637 | .if vector < NR_VECTORS | ||
| 638 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
| 635 | CFI_ADJUST_CFA_OFFSET -4 | 639 | CFI_ADJUST_CFA_OFFSET -4 |
| 636 | .endif | 640 | .endif |
| 637 | 1: pushl $~(vector) | 641 | 1: pushl $(~vector+0x80) /* Note: always in signed byte range */ |
| 638 | CFI_ADJUST_CFA_OFFSET 4 | 642 | CFI_ADJUST_CFA_OFFSET 4 |
| 639 | jmp common_interrupt | 643 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
| 640 | .previous | 644 | jmp 2f |
| 645 | .endif | ||
| 646 | .previous | ||
| 641 | .long 1b | 647 | .long 1b |
| 642 | .text | 648 | .text |
| 643 | vector=vector+1 | 649 | vector=vector+1 |
| 650 | .endif | ||
| 651 | .endr | ||
| 652 | 2: jmp common_interrupt | ||
| 644 | .endr | 653 | .endr |
| 645 | END(irq_entries_start) | 654 | END(irq_entries_start) |
| 646 | 655 | ||
| @@ -652,8 +661,9 @@ END(interrupt) | |||
| 652 | * the CPU automatically disables interrupts when executing an IRQ vector, | 661 | * the CPU automatically disables interrupts when executing an IRQ vector, |
| 653 | * so IRQ-flags tracing has to follow that: | 662 | * so IRQ-flags tracing has to follow that: |
| 654 | */ | 663 | */ |
| 655 | ALIGN | 664 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
| 656 | common_interrupt: | 665 | common_interrupt: |
| 666 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ | ||
| 657 | SAVE_ALL | 667 | SAVE_ALL |
| 658 | TRACE_IRQS_OFF | 668 | TRACE_IRQS_OFF |
| 659 | movl %esp,%eax | 669 | movl %esp,%eax |
| @@ -678,65 +688,6 @@ ENDPROC(name) | |||
| 678 | /* The include is where all of the SMP etc. interrupts come from */ | 688 | /* The include is where all of the SMP etc. interrupts come from */ |
| 679 | #include "entry_arch.h" | 689 | #include "entry_arch.h" |
| 680 | 690 | ||
| 681 | KPROBE_ENTRY(page_fault) | ||
| 682 | RING0_EC_FRAME | ||
| 683 | pushl $do_page_fault | ||
| 684 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 685 | ALIGN | ||
| 686 | error_code: | ||
| 687 | /* the function address is in %fs's slot on the stack */ | ||
| 688 | pushl %es | ||
| 689 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 690 | /*CFI_REL_OFFSET es, 0*/ | ||
| 691 | pushl %ds | ||
| 692 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 693 | /*CFI_REL_OFFSET ds, 0*/ | ||
| 694 | pushl %eax | ||
| 695 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 696 | CFI_REL_OFFSET eax, 0 | ||
| 697 | pushl %ebp | ||
| 698 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 699 | CFI_REL_OFFSET ebp, 0 | ||
| 700 | pushl %edi | ||
| 701 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 702 | CFI_REL_OFFSET edi, 0 | ||
| 703 | pushl %esi | ||
| 704 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 705 | CFI_REL_OFFSET esi, 0 | ||
| 706 | pushl %edx | ||
| 707 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 708 | CFI_REL_OFFSET edx, 0 | ||
| 709 | pushl %ecx | ||
| 710 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 711 | CFI_REL_OFFSET ecx, 0 | ||
| 712 | pushl %ebx | ||
| 713 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 714 | CFI_REL_OFFSET ebx, 0 | ||
| 715 | cld | ||
| 716 | pushl %fs | ||
| 717 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 718 | /*CFI_REL_OFFSET fs, 0*/ | ||
| 719 | movl $(__KERNEL_PERCPU), %ecx | ||
| 720 | movl %ecx, %fs | ||
| 721 | UNWIND_ESPFIX_STACK | ||
| 722 | popl %ecx | ||
| 723 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 724 | /*CFI_REGISTER es, ecx*/ | ||
| 725 | movl PT_FS(%esp), %edi # get the function address | ||
| 726 | movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
| 727 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
| 728 | mov %ecx, PT_FS(%esp) | ||
| 729 | /*CFI_REL_OFFSET fs, ES*/ | ||
| 730 | movl $(__USER_DS), %ecx | ||
| 731 | movl %ecx, %ds | ||
| 732 | movl %ecx, %es | ||
| 733 | TRACE_IRQS_OFF | ||
| 734 | movl %esp,%eax # pt_regs pointer | ||
| 735 | call *%edi | ||
| 736 | jmp ret_from_exception | ||
| 737 | CFI_ENDPROC | ||
| 738 | KPROBE_END(page_fault) | ||
| 739 | |||
| 740 | ENTRY(coprocessor_error) | 691 | ENTRY(coprocessor_error) |
| 741 | RING0_INT_FRAME | 692 | RING0_INT_FRAME |
| 742 | pushl $0 | 693 | pushl $0 |
| @@ -767,140 +718,6 @@ ENTRY(device_not_available) | |||
| 767 | CFI_ENDPROC | 718 | CFI_ENDPROC |
| 768 | END(device_not_available) | 719 | END(device_not_available) |
| 769 | 720 | ||
| 770 | /* | ||
| 771 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
| 772 | * that sets up the real kernel stack. Check here, since we can't | ||
| 773 | * allow the wrong stack to be used. | ||
| 774 | * | ||
| 775 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
| 776 | * already pushed 3 words if it hits on the sysenter instruction: | ||
| 777 | * eflags, cs and eip. | ||
| 778 | * | ||
| 779 | * We just load the right stack, and push the three (known) values | ||
| 780 | * by hand onto the new stack - while updating the return eip past | ||
| 781 | * the instruction that would have done it for sysenter. | ||
| 782 | */ | ||
| 783 | #define FIX_STACK(offset, ok, label) \ | ||
| 784 | cmpw $__KERNEL_CS,4(%esp); \ | ||
| 785 | jne ok; \ | ||
| 786 | label: \ | ||
| 787 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | ||
| 788 | CFI_DEF_CFA esp, 0; \ | ||
| 789 | CFI_UNDEFINED eip; \ | ||
| 790 | pushfl; \ | ||
| 791 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
| 792 | pushl $__KERNEL_CS; \ | ||
| 793 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
| 794 | pushl $sysenter_past_esp; \ | ||
| 795 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
| 796 | CFI_REL_OFFSET eip, 0 | ||
| 797 | |||
| 798 | KPROBE_ENTRY(debug) | ||
| 799 | RING0_INT_FRAME | ||
| 800 | cmpl $ia32_sysenter_target,(%esp) | ||
| 801 | jne debug_stack_correct | ||
| 802 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | ||
| 803 | debug_stack_correct: | ||
| 804 | pushl $-1 # mark this as an int | ||
| 805 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 806 | SAVE_ALL | ||
| 807 | TRACE_IRQS_OFF | ||
| 808 | xorl %edx,%edx # error code 0 | ||
| 809 | movl %esp,%eax # pt_regs pointer | ||
| 810 | call do_debug | ||
| 811 | jmp ret_from_exception | ||
| 812 | CFI_ENDPROC | ||
| 813 | KPROBE_END(debug) | ||
| 814 | |||
| 815 | /* | ||
| 816 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
| 817 | * a debug fault, and the debug fault hasn't yet been able to | ||
| 818 | * clear up the stack. So we first check whether we got an | ||
| 819 | * NMI on the sysenter entry path, but after that we need to | ||
| 820 | * check whether we got an NMI on the debug path where the debug | ||
| 821 | * fault happened on the sysenter path. | ||
| 822 | */ | ||
| 823 | KPROBE_ENTRY(nmi) | ||
| 824 | RING0_INT_FRAME | ||
| 825 | pushl %eax | ||
| 826 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 827 | movl %ss, %eax | ||
| 828 | cmpw $__ESPFIX_SS, %ax | ||
| 829 | popl %eax | ||
| 830 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 831 | je nmi_espfix_stack | ||
| 832 | cmpl $ia32_sysenter_target,(%esp) | ||
| 833 | je nmi_stack_fixup | ||
| 834 | pushl %eax | ||
| 835 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 836 | movl %esp,%eax | ||
| 837 | /* Do not access memory above the end of our stack page, | ||
| 838 | * it might not exist. | ||
| 839 | */ | ||
| 840 | andl $(THREAD_SIZE-1),%eax | ||
| 841 | cmpl $(THREAD_SIZE-20),%eax | ||
| 842 | popl %eax | ||
| 843 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 844 | jae nmi_stack_correct | ||
| 845 | cmpl $ia32_sysenter_target,12(%esp) | ||
| 846 | je nmi_debug_stack_check | ||
| 847 | nmi_stack_correct: | ||
| 848 | /* We have a RING0_INT_FRAME here */ | ||
| 849 | pushl %eax | ||
| 850 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 851 | SAVE_ALL | ||
| 852 | TRACE_IRQS_OFF | ||
| 853 | xorl %edx,%edx # zero error code | ||
| 854 | movl %esp,%eax # pt_regs pointer | ||
| 855 | call do_nmi | ||
| 856 | jmp restore_nocheck_notrace | ||
| 857 | CFI_ENDPROC | ||
| 858 | |||
| 859 | nmi_stack_fixup: | ||
| 860 | RING0_INT_FRAME | ||
| 861 | FIX_STACK(12,nmi_stack_correct, 1) | ||
| 862 | jmp nmi_stack_correct | ||
| 863 | |||
| 864 | nmi_debug_stack_check: | ||
| 865 | /* We have a RING0_INT_FRAME here */ | ||
| 866 | cmpw $__KERNEL_CS,16(%esp) | ||
| 867 | jne nmi_stack_correct | ||
| 868 | cmpl $debug,(%esp) | ||
| 869 | jb nmi_stack_correct | ||
| 870 | cmpl $debug_esp_fix_insn,(%esp) | ||
| 871 | ja nmi_stack_correct | ||
| 872 | FIX_STACK(24,nmi_stack_correct, 1) | ||
| 873 | jmp nmi_stack_correct | ||
| 874 | |||
| 875 | nmi_espfix_stack: | ||
| 876 | /* We have a RING0_INT_FRAME here. | ||
| 877 | * | ||
| 878 | * create the pointer to lss back | ||
| 879 | */ | ||
| 880 | pushl %ss | ||
| 881 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 882 | pushl %esp | ||
| 883 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 884 | addw $4, (%esp) | ||
| 885 | /* copy the iret frame of 12 bytes */ | ||
| 886 | .rept 3 | ||
| 887 | pushl 16(%esp) | ||
| 888 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 889 | .endr | ||
| 890 | pushl %eax | ||
| 891 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 892 | SAVE_ALL | ||
| 893 | TRACE_IRQS_OFF | ||
| 894 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
| 895 | xorl %edx,%edx # zero error code | ||
| 896 | call do_nmi | ||
| 897 | RESTORE_REGS | ||
| 898 | lss 12+4(%esp), %esp # back to espfix stack | ||
| 899 | CFI_ADJUST_CFA_OFFSET -24 | ||
| 900 | jmp irq_return | ||
| 901 | CFI_ENDPROC | ||
| 902 | KPROBE_END(nmi) | ||
| 903 | |||
| 904 | #ifdef CONFIG_PARAVIRT | 721 | #ifdef CONFIG_PARAVIRT |
| 905 | ENTRY(native_iret) | 722 | ENTRY(native_iret) |
| 906 | iret | 723 | iret |
| @@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit) | |||
| 916 | END(native_irq_enable_sysexit) | 733 | END(native_irq_enable_sysexit) |
| 917 | #endif | 734 | #endif |
| 918 | 735 | ||
| 919 | KPROBE_ENTRY(int3) | ||
| 920 | RING0_INT_FRAME | ||
| 921 | pushl $-1 # mark this as an int | ||
| 922 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 923 | SAVE_ALL | ||
| 924 | TRACE_IRQS_OFF | ||
| 925 | xorl %edx,%edx # zero error code | ||
| 926 | movl %esp,%eax # pt_regs pointer | ||
| 927 | call do_int3 | ||
| 928 | jmp ret_from_exception | ||
| 929 | CFI_ENDPROC | ||
| 930 | KPROBE_END(int3) | ||
| 931 | |||
| 932 | ENTRY(overflow) | 736 | ENTRY(overflow) |
| 933 | RING0_INT_FRAME | 737 | RING0_INT_FRAME |
| 934 | pushl $0 | 738 | pushl $0 |
| @@ -993,14 +797,6 @@ ENTRY(stack_segment) | |||
| 993 | CFI_ENDPROC | 797 | CFI_ENDPROC |
| 994 | END(stack_segment) | 798 | END(stack_segment) |
| 995 | 799 | ||
| 996 | KPROBE_ENTRY(general_protection) | ||
| 997 | RING0_EC_FRAME | ||
| 998 | pushl $do_general_protection | ||
| 999 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1000 | jmp error_code | ||
| 1001 | CFI_ENDPROC | ||
| 1002 | KPROBE_END(general_protection) | ||
| 1003 | |||
| 1004 | ENTRY(alignment_check) | 800 | ENTRY(alignment_check) |
| 1005 | RING0_EC_FRAME | 801 | RING0_EC_FRAME |
| 1006 | pushl $do_alignment_check | 802 | pushl $do_alignment_check |
| @@ -1051,6 +847,7 @@ ENTRY(kernel_thread_helper) | |||
| 1051 | push %eax | 847 | push %eax |
| 1052 | CFI_ADJUST_CFA_OFFSET 4 | 848 | CFI_ADJUST_CFA_OFFSET 4 |
| 1053 | call do_exit | 849 | call do_exit |
| 850 | ud2 # padding for call trace | ||
| 1054 | CFI_ENDPROC | 851 | CFI_ENDPROC |
| 1055 | ENDPROC(kernel_thread_helper) | 852 | ENDPROC(kernel_thread_helper) |
| 1056 | 853 | ||
| @@ -1157,6 +954,9 @@ ENTRY(mcount) | |||
| 1157 | END(mcount) | 954 | END(mcount) |
| 1158 | 955 | ||
| 1159 | ENTRY(ftrace_caller) | 956 | ENTRY(ftrace_caller) |
| 957 | cmpl $0, function_trace_stop | ||
| 958 | jne ftrace_stub | ||
| 959 | |||
| 1160 | pushl %eax | 960 | pushl %eax |
| 1161 | pushl %ecx | 961 | pushl %ecx |
| 1162 | pushl %edx | 962 | pushl %edx |
| @@ -1171,6 +971,11 @@ ftrace_call: | |||
| 1171 | popl %edx | 971 | popl %edx |
| 1172 | popl %ecx | 972 | popl %ecx |
| 1173 | popl %eax | 973 | popl %eax |
| 974 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 975 | .globl ftrace_graph_call | ||
| 976 | ftrace_graph_call: | ||
| 977 | jmp ftrace_stub | ||
| 978 | #endif | ||
| 1174 | 979 | ||
| 1175 | .globl ftrace_stub | 980 | .globl ftrace_stub |
| 1176 | ftrace_stub: | 981 | ftrace_stub: |
| @@ -1180,8 +985,18 @@ END(ftrace_caller) | |||
| 1180 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 985 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
| 1181 | 986 | ||
| 1182 | ENTRY(mcount) | 987 | ENTRY(mcount) |
| 988 | cmpl $0, function_trace_stop | ||
| 989 | jne ftrace_stub | ||
| 990 | |||
| 1183 | cmpl $ftrace_stub, ftrace_trace_function | 991 | cmpl $ftrace_stub, ftrace_trace_function |
| 1184 | jnz trace | 992 | jnz trace |
| 993 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 994 | cmpl $ftrace_stub, ftrace_graph_return | ||
| 995 | jnz ftrace_graph_caller | ||
| 996 | |||
| 997 | cmpl $ftrace_graph_entry_stub, ftrace_graph_entry | ||
| 998 | jnz ftrace_graph_caller | ||
| 999 | #endif | ||
| 1185 | .globl ftrace_stub | 1000 | .globl ftrace_stub |
| 1186 | ftrace_stub: | 1001 | ftrace_stub: |
| 1187 | ret | 1002 | ret |
| @@ -1200,13 +1015,268 @@ trace: | |||
| 1200 | popl %edx | 1015 | popl %edx |
| 1201 | popl %ecx | 1016 | popl %ecx |
| 1202 | popl %eax | 1017 | popl %eax |
| 1203 | |||
| 1204 | jmp ftrace_stub | 1018 | jmp ftrace_stub |
| 1205 | END(mcount) | 1019 | END(mcount) |
| 1206 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 1020 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
| 1207 | #endif /* CONFIG_FUNCTION_TRACER */ | 1021 | #endif /* CONFIG_FUNCTION_TRACER */ |
| 1208 | 1022 | ||
| 1023 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 1024 | ENTRY(ftrace_graph_caller) | ||
| 1025 | cmpl $0, function_trace_stop | ||
| 1026 | jne ftrace_stub | ||
| 1027 | |||
| 1028 | pushl %eax | ||
| 1029 | pushl %ecx | ||
| 1030 | pushl %edx | ||
| 1031 | movl 0xc(%esp), %edx | ||
| 1032 | lea 0x4(%ebp), %eax | ||
| 1033 | subl $MCOUNT_INSN_SIZE, %edx | ||
| 1034 | call prepare_ftrace_return | ||
| 1035 | popl %edx | ||
| 1036 | popl %ecx | ||
| 1037 | popl %eax | ||
| 1038 | ret | ||
| 1039 | END(ftrace_graph_caller) | ||
| 1040 | |||
| 1041 | .globl return_to_handler | ||
| 1042 | return_to_handler: | ||
| 1043 | pushl $0 | ||
| 1044 | pushl %eax | ||
| 1045 | pushl %ecx | ||
| 1046 | pushl %edx | ||
| 1047 | call ftrace_return_to_handler | ||
| 1048 | movl %eax, 0xc(%esp) | ||
| 1049 | popl %edx | ||
| 1050 | popl %ecx | ||
| 1051 | popl %eax | ||
| 1052 | ret | ||
| 1053 | #endif | ||
| 1054 | |||
| 1209 | .section .rodata,"a" | 1055 | .section .rodata,"a" |
| 1210 | #include "syscall_table_32.S" | 1056 | #include "syscall_table_32.S" |
| 1211 | 1057 | ||
| 1212 | syscall_table_size=(.-sys_call_table) | 1058 | syscall_table_size=(.-sys_call_table) |
| 1059 | |||
| 1060 | /* | ||
| 1061 | * Some functions should be protected against kprobes | ||
| 1062 | */ | ||
| 1063 | .pushsection .kprobes.text, "ax" | ||
| 1064 | |||
| 1065 | ENTRY(page_fault) | ||
| 1066 | RING0_EC_FRAME | ||
| 1067 | pushl $do_page_fault | ||
| 1068 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1069 | ALIGN | ||
| 1070 | error_code: | ||
| 1071 | /* the function address is in %fs's slot on the stack */ | ||
| 1072 | pushl %es | ||
| 1073 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1074 | /*CFI_REL_OFFSET es, 0*/ | ||
| 1075 | pushl %ds | ||
| 1076 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1077 | /*CFI_REL_OFFSET ds, 0*/ | ||
| 1078 | pushl %eax | ||
| 1079 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1080 | CFI_REL_OFFSET eax, 0 | ||
| 1081 | pushl %ebp | ||
| 1082 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1083 | CFI_REL_OFFSET ebp, 0 | ||
| 1084 | pushl %edi | ||
| 1085 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1086 | CFI_REL_OFFSET edi, 0 | ||
| 1087 | pushl %esi | ||
| 1088 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1089 | CFI_REL_OFFSET esi, 0 | ||
| 1090 | pushl %edx | ||
| 1091 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1092 | CFI_REL_OFFSET edx, 0 | ||
| 1093 | pushl %ecx | ||
| 1094 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1095 | CFI_REL_OFFSET ecx, 0 | ||
| 1096 | pushl %ebx | ||
| 1097 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1098 | CFI_REL_OFFSET ebx, 0 | ||
| 1099 | cld | ||
| 1100 | pushl %fs | ||
| 1101 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1102 | /*CFI_REL_OFFSET fs, 0*/ | ||
| 1103 | movl $(__KERNEL_PERCPU), %ecx | ||
| 1104 | movl %ecx, %fs | ||
| 1105 | UNWIND_ESPFIX_STACK | ||
| 1106 | popl %ecx | ||
| 1107 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1108 | /*CFI_REGISTER es, ecx*/ | ||
| 1109 | movl PT_FS(%esp), %edi # get the function address | ||
| 1110 | movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
| 1111 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
| 1112 | mov %ecx, PT_FS(%esp) | ||
| 1113 | /*CFI_REL_OFFSET fs, ES*/ | ||
| 1114 | movl $(__USER_DS), %ecx | ||
| 1115 | movl %ecx, %ds | ||
| 1116 | movl %ecx, %es | ||
| 1117 | TRACE_IRQS_OFF | ||
| 1118 | movl %esp,%eax # pt_regs pointer | ||
| 1119 | call *%edi | ||
| 1120 | jmp ret_from_exception | ||
| 1121 | CFI_ENDPROC | ||
| 1122 | END(page_fault) | ||
| 1123 | |||
| 1124 | /* | ||
| 1125 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
| 1126 | * that sets up the real kernel stack. Check here, since we can't | ||
| 1127 | * allow the wrong stack to be used. | ||
| 1128 | * | ||
| 1129 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
| 1130 | * already pushed 3 words if it hits on the sysenter instruction: | ||
| 1131 | * eflags, cs and eip. | ||
| 1132 | * | ||
| 1133 | * We just load the right stack, and push the three (known) values | ||
| 1134 | * by hand onto the new stack - while updating the return eip past | ||
| 1135 | * the instruction that would have done it for sysenter. | ||
| 1136 | */ | ||
| 1137 | #define FIX_STACK(offset, ok, label) \ | ||
| 1138 | cmpw $__KERNEL_CS,4(%esp); \ | ||
| 1139 | jne ok; \ | ||
| 1140 | label: \ | ||
| 1141 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | ||
| 1142 | CFI_DEF_CFA esp, 0; \ | ||
| 1143 | CFI_UNDEFINED eip; \ | ||
| 1144 | pushfl; \ | ||
| 1145 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
| 1146 | pushl $__KERNEL_CS; \ | ||
| 1147 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
| 1148 | pushl $sysenter_past_esp; \ | ||
| 1149 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
| 1150 | CFI_REL_OFFSET eip, 0 | ||
| 1151 | |||
| 1152 | ENTRY(debug) | ||
| 1153 | RING0_INT_FRAME | ||
| 1154 | cmpl $ia32_sysenter_target,(%esp) | ||
| 1155 | jne debug_stack_correct | ||
| 1156 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | ||
| 1157 | debug_stack_correct: | ||
| 1158 | pushl $-1 # mark this as an int | ||
| 1159 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1160 | SAVE_ALL | ||
| 1161 | TRACE_IRQS_OFF | ||
| 1162 | xorl %edx,%edx # error code 0 | ||
| 1163 | movl %esp,%eax # pt_regs pointer | ||
| 1164 | call do_debug | ||
| 1165 | jmp ret_from_exception | ||
| 1166 | CFI_ENDPROC | ||
| 1167 | END(debug) | ||
| 1168 | |||
| 1169 | /* | ||
| 1170 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
| 1171 | * a debug fault, and the debug fault hasn't yet been able to | ||
| 1172 | * clear up the stack. So we first check whether we got an | ||
| 1173 | * NMI on the sysenter entry path, but after that we need to | ||
| 1174 | * check whether we got an NMI on the debug path where the debug | ||
| 1175 | * fault happened on the sysenter path. | ||
| 1176 | */ | ||
| 1177 | ENTRY(nmi) | ||
| 1178 | RING0_INT_FRAME | ||
| 1179 | pushl %eax | ||
| 1180 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1181 | movl %ss, %eax | ||
| 1182 | cmpw $__ESPFIX_SS, %ax | ||
| 1183 | popl %eax | ||
| 1184 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1185 | je nmi_espfix_stack | ||
| 1186 | cmpl $ia32_sysenter_target,(%esp) | ||
| 1187 | je nmi_stack_fixup | ||
| 1188 | pushl %eax | ||
| 1189 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1190 | movl %esp,%eax | ||
| 1191 | /* Do not access memory above the end of our stack page, | ||
| 1192 | * it might not exist. | ||
| 1193 | */ | ||
| 1194 | andl $(THREAD_SIZE-1),%eax | ||
| 1195 | cmpl $(THREAD_SIZE-20),%eax | ||
| 1196 | popl %eax | ||
| 1197 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1198 | jae nmi_stack_correct | ||
| 1199 | cmpl $ia32_sysenter_target,12(%esp) | ||
| 1200 | je nmi_debug_stack_check | ||
| 1201 | nmi_stack_correct: | ||
| 1202 | /* We have a RING0_INT_FRAME here */ | ||
| 1203 | pushl %eax | ||
| 1204 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1205 | SAVE_ALL | ||
| 1206 | TRACE_IRQS_OFF | ||
| 1207 | xorl %edx,%edx # zero error code | ||
| 1208 | movl %esp,%eax # pt_regs pointer | ||
| 1209 | call do_nmi | ||
| 1210 | jmp restore_nocheck_notrace | ||
| 1211 | CFI_ENDPROC | ||
| 1212 | |||
| 1213 | nmi_stack_fixup: | ||
| 1214 | RING0_INT_FRAME | ||
| 1215 | FIX_STACK(12,nmi_stack_correct, 1) | ||
| 1216 | jmp nmi_stack_correct | ||
| 1217 | |||
| 1218 | nmi_debug_stack_check: | ||
| 1219 | /* We have a RING0_INT_FRAME here */ | ||
| 1220 | cmpw $__KERNEL_CS,16(%esp) | ||
| 1221 | jne nmi_stack_correct | ||
| 1222 | cmpl $debug,(%esp) | ||
| 1223 | jb nmi_stack_correct | ||
| 1224 | cmpl $debug_esp_fix_insn,(%esp) | ||
| 1225 | ja nmi_stack_correct | ||
| 1226 | FIX_STACK(24,nmi_stack_correct, 1) | ||
| 1227 | jmp nmi_stack_correct | ||
| 1228 | |||
| 1229 | nmi_espfix_stack: | ||
| 1230 | /* We have a RING0_INT_FRAME here. | ||
| 1231 | * | ||
| 1232 | * create the pointer to lss back | ||
| 1233 | */ | ||
| 1234 | pushl %ss | ||
| 1235 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1236 | pushl %esp | ||
| 1237 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1238 | addw $4, (%esp) | ||
| 1239 | /* copy the iret frame of 12 bytes */ | ||
| 1240 | .rept 3 | ||
| 1241 | pushl 16(%esp) | ||
| 1242 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1243 | .endr | ||
| 1244 | pushl %eax | ||
| 1245 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1246 | SAVE_ALL | ||
| 1247 | TRACE_IRQS_OFF | ||
| 1248 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
| 1249 | xorl %edx,%edx # zero error code | ||
| 1250 | call do_nmi | ||
| 1251 | RESTORE_REGS | ||
| 1252 | lss 12+4(%esp), %esp # back to espfix stack | ||
| 1253 | CFI_ADJUST_CFA_OFFSET -24 | ||
| 1254 | jmp irq_return | ||
| 1255 | CFI_ENDPROC | ||
| 1256 | END(nmi) | ||
| 1257 | |||
| 1258 | ENTRY(int3) | ||
| 1259 | RING0_INT_FRAME | ||
| 1260 | pushl $-1 # mark this as an int | ||
| 1261 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1262 | SAVE_ALL | ||
| 1263 | TRACE_IRQS_OFF | ||
| 1264 | xorl %edx,%edx # zero error code | ||
| 1265 | movl %esp,%eax # pt_regs pointer | ||
| 1266 | call do_int3 | ||
| 1267 | jmp ret_from_exception | ||
| 1268 | CFI_ENDPROC | ||
| 1269 | END(int3) | ||
| 1270 | |||
| 1271 | ENTRY(general_protection) | ||
| 1272 | RING0_EC_FRAME | ||
| 1273 | pushl $do_general_protection | ||
| 1274 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1275 | jmp error_code | ||
| 1276 | CFI_ENDPROC | ||
| 1277 | END(general_protection) | ||
| 1278 | |||
| 1279 | /* | ||
| 1280 | * End of kprobes section | ||
| 1281 | */ | ||
| 1282 | .popsection | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a6..e28c7a987793 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -11,15 +11,15 @@ | |||
| 11 | * | 11 | * |
| 12 | * NOTE: This code handles signal-recognition, which happens every time | 12 | * NOTE: This code handles signal-recognition, which happens every time |
| 13 | * after an interrupt and after each system call. | 13 | * after an interrupt and after each system call. |
| 14 | * | 14 | * |
| 15 | * Normal syscalls and interrupts don't save a full stack frame, this is | 15 | * Normal syscalls and interrupts don't save a full stack frame, this is |
| 16 | * only done for syscall tracing, signals or fork/exec et.al. | 16 | * only done for syscall tracing, signals or fork/exec et.al. |
| 17 | * | 17 | * |
| 18 | * A note on terminology: | 18 | * A note on terminology: |
| 19 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 19 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
| 20 | * at the top of the kernel process stack. | 20 | * at the top of the kernel process stack. |
| 21 | * - partial stack frame: partially saved registers upto R11. | 21 | * - partial stack frame: partially saved registers upto R11. |
| 22 | * - full stack frame: Like partial stack frame, but all register saved. | 22 | * - full stack frame: Like partial stack frame, but all register saved. |
| 23 | * | 23 | * |
| 24 | * Some macro usage: | 24 | * Some macro usage: |
| 25 | * - CFI macros are used to generate dwarf2 unwind information for better | 25 | * - CFI macros are used to generate dwarf2 unwind information for better |
| @@ -60,7 +60,6 @@ | |||
| 60 | #define __AUDIT_ARCH_LE 0x40000000 | 60 | #define __AUDIT_ARCH_LE 0x40000000 |
| 61 | 61 | ||
| 62 | .code64 | 62 | .code64 |
| 63 | |||
| 64 | #ifdef CONFIG_FUNCTION_TRACER | 63 | #ifdef CONFIG_FUNCTION_TRACER |
| 65 | #ifdef CONFIG_DYNAMIC_FTRACE | 64 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 66 | ENTRY(mcount) | 65 | ENTRY(mcount) |
| @@ -68,16 +67,10 @@ ENTRY(mcount) | |||
| 68 | END(mcount) | 67 | END(mcount) |
| 69 | 68 | ||
| 70 | ENTRY(ftrace_caller) | 69 | ENTRY(ftrace_caller) |
| 70 | cmpl $0, function_trace_stop | ||
| 71 | jne ftrace_stub | ||
| 71 | 72 | ||
| 72 | /* taken from glibc */ | 73 | MCOUNT_SAVE_FRAME |
| 73 | subq $0x38, %rsp | ||
| 74 | movq %rax, (%rsp) | ||
| 75 | movq %rcx, 8(%rsp) | ||
| 76 | movq %rdx, 16(%rsp) | ||
| 77 | movq %rsi, 24(%rsp) | ||
| 78 | movq %rdi, 32(%rsp) | ||
| 79 | movq %r8, 40(%rsp) | ||
| 80 | movq %r9, 48(%rsp) | ||
| 81 | 74 | ||
| 82 | movq 0x38(%rsp), %rdi | 75 | movq 0x38(%rsp), %rdi |
| 83 | movq 8(%rbp), %rsi | 76 | movq 8(%rbp), %rsi |
| @@ -87,14 +80,13 @@ ENTRY(ftrace_caller) | |||
| 87 | ftrace_call: | 80 | ftrace_call: |
| 88 | call ftrace_stub | 81 | call ftrace_stub |
| 89 | 82 | ||
| 90 | movq 48(%rsp), %r9 | 83 | MCOUNT_RESTORE_FRAME |
| 91 | movq 40(%rsp), %r8 | 84 | |
| 92 | movq 32(%rsp), %rdi | 85 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
| 93 | movq 24(%rsp), %rsi | 86 | .globl ftrace_graph_call |
| 94 | movq 16(%rsp), %rdx | 87 | ftrace_graph_call: |
| 95 | movq 8(%rsp), %rcx | 88 | jmp ftrace_stub |
| 96 | movq (%rsp), %rax | 89 | #endif |
| 97 | addq $0x38, %rsp | ||
| 98 | 90 | ||
| 99 | .globl ftrace_stub | 91 | .globl ftrace_stub |
| 100 | ftrace_stub: | 92 | ftrace_stub: |
| @@ -103,15 +95,63 @@ END(ftrace_caller) | |||
| 103 | 95 | ||
| 104 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 96 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
| 105 | ENTRY(mcount) | 97 | ENTRY(mcount) |
| 98 | cmpl $0, function_trace_stop | ||
| 99 | jne ftrace_stub | ||
| 100 | |||
| 106 | cmpq $ftrace_stub, ftrace_trace_function | 101 | cmpq $ftrace_stub, ftrace_trace_function |
| 107 | jnz trace | 102 | jnz trace |
| 103 | |||
| 104 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 105 | cmpq $ftrace_stub, ftrace_graph_return | ||
| 106 | jnz ftrace_graph_caller | ||
| 107 | |||
| 108 | cmpq $ftrace_graph_entry_stub, ftrace_graph_entry | ||
| 109 | jnz ftrace_graph_caller | ||
| 110 | #endif | ||
| 111 | |||
| 108 | .globl ftrace_stub | 112 | .globl ftrace_stub |
| 109 | ftrace_stub: | 113 | ftrace_stub: |
| 110 | retq | 114 | retq |
| 111 | 115 | ||
| 112 | trace: | 116 | trace: |
| 113 | /* taken from glibc */ | 117 | MCOUNT_SAVE_FRAME |
| 114 | subq $0x38, %rsp | 118 | |
| 119 | movq 0x38(%rsp), %rdi | ||
| 120 | movq 8(%rbp), %rsi | ||
| 121 | subq $MCOUNT_INSN_SIZE, %rdi | ||
| 122 | |||
| 123 | call *ftrace_trace_function | ||
| 124 | |||
| 125 | MCOUNT_RESTORE_FRAME | ||
| 126 | |||
| 127 | jmp ftrace_stub | ||
| 128 | END(mcount) | ||
| 129 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
| 130 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
| 131 | |||
| 132 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 133 | ENTRY(ftrace_graph_caller) | ||
| 134 | cmpl $0, function_trace_stop | ||
| 135 | jne ftrace_stub | ||
| 136 | |||
| 137 | MCOUNT_SAVE_FRAME | ||
| 138 | |||
| 139 | leaq 8(%rbp), %rdi | ||
| 140 | movq 0x38(%rsp), %rsi | ||
| 141 | subq $MCOUNT_INSN_SIZE, %rsi | ||
| 142 | |||
| 143 | call prepare_ftrace_return | ||
| 144 | |||
| 145 | MCOUNT_RESTORE_FRAME | ||
| 146 | |||
| 147 | retq | ||
| 148 | END(ftrace_graph_caller) | ||
| 149 | |||
| 150 | |||
| 151 | .globl return_to_handler | ||
| 152 | return_to_handler: | ||
| 153 | subq $80, %rsp | ||
| 154 | |||
| 115 | movq %rax, (%rsp) | 155 | movq %rax, (%rsp) |
| 116 | movq %rcx, 8(%rsp) | 156 | movq %rcx, 8(%rsp) |
| 117 | movq %rdx, 16(%rsp) | 157 | movq %rdx, 16(%rsp) |
| @@ -119,13 +159,14 @@ trace: | |||
| 119 | movq %rdi, 32(%rsp) | 159 | movq %rdi, 32(%rsp) |
| 120 | movq %r8, 40(%rsp) | 160 | movq %r8, 40(%rsp) |
| 121 | movq %r9, 48(%rsp) | 161 | movq %r9, 48(%rsp) |
| 162 | movq %r10, 56(%rsp) | ||
| 163 | movq %r11, 64(%rsp) | ||
| 122 | 164 | ||
| 123 | movq 0x38(%rsp), %rdi | 165 | call ftrace_return_to_handler |
| 124 | movq 8(%rbp), %rsi | ||
| 125 | subq $MCOUNT_INSN_SIZE, %rdi | ||
| 126 | |||
| 127 | call *ftrace_trace_function | ||
| 128 | 166 | ||
| 167 | movq %rax, 72(%rsp) | ||
| 168 | movq 64(%rsp), %r11 | ||
| 169 | movq 56(%rsp), %r10 | ||
| 129 | movq 48(%rsp), %r9 | 170 | movq 48(%rsp), %r9 |
| 130 | movq 40(%rsp), %r8 | 171 | movq 40(%rsp), %r8 |
| 131 | movq 32(%rsp), %rdi | 172 | movq 32(%rsp), %rdi |
| @@ -133,16 +174,14 @@ trace: | |||
| 133 | movq 16(%rsp), %rdx | 174 | movq 16(%rsp), %rdx |
| 134 | movq 8(%rsp), %rcx | 175 | movq 8(%rsp), %rcx |
| 135 | movq (%rsp), %rax | 176 | movq (%rsp), %rax |
| 136 | addq $0x38, %rsp | 177 | addq $72, %rsp |
| 178 | retq | ||
| 179 | #endif | ||
| 137 | 180 | ||
| 138 | jmp ftrace_stub | ||
| 139 | END(mcount) | ||
| 140 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
| 141 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
| 142 | 181 | ||
| 143 | #ifndef CONFIG_PREEMPT | 182 | #ifndef CONFIG_PREEMPT |
| 144 | #define retint_kernel retint_restore_args | 183 | #define retint_kernel retint_restore_args |
| 145 | #endif | 184 | #endif |
| 146 | 185 | ||
| 147 | #ifdef CONFIG_PARAVIRT | 186 | #ifdef CONFIG_PARAVIRT |
| 148 | ENTRY(native_usergs_sysret64) | 187 | ENTRY(native_usergs_sysret64) |
| @@ -161,29 +200,29 @@ ENTRY(native_usergs_sysret64) | |||
| 161 | .endm | 200 | .endm |
| 162 | 201 | ||
| 163 | /* | 202 | /* |
| 164 | * C code is not supposed to know about undefined top of stack. Every time | 203 | * C code is not supposed to know about undefined top of stack. Every time |
| 165 | * a C function with an pt_regs argument is called from the SYSCALL based | 204 | * a C function with an pt_regs argument is called from the SYSCALL based |
| 166 | * fast path FIXUP_TOP_OF_STACK is needed. | 205 | * fast path FIXUP_TOP_OF_STACK is needed. |
| 167 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | 206 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs |
| 168 | * manipulation. | 207 | * manipulation. |
| 169 | */ | 208 | */ |
| 170 | 209 | ||
| 171 | /* %rsp:at FRAMEEND */ | 210 | /* %rsp:at FRAMEEND */ |
| 172 | .macro FIXUP_TOP_OF_STACK tmp | 211 | .macro FIXUP_TOP_OF_STACK tmp offset=0 |
| 173 | movq %gs:pda_oldrsp,\tmp | 212 | movq %gs:pda_oldrsp,\tmp |
| 174 | movq \tmp,RSP(%rsp) | 213 | movq \tmp,RSP+\offset(%rsp) |
| 175 | movq $__USER_DS,SS(%rsp) | 214 | movq $__USER_DS,SS+\offset(%rsp) |
| 176 | movq $__USER_CS,CS(%rsp) | 215 | movq $__USER_CS,CS+\offset(%rsp) |
| 177 | movq $-1,RCX(%rsp) | 216 | movq $-1,RCX+\offset(%rsp) |
| 178 | movq R11(%rsp),\tmp /* get eflags */ | 217 | movq R11+\offset(%rsp),\tmp /* get eflags */ |
| 179 | movq \tmp,EFLAGS(%rsp) | 218 | movq \tmp,EFLAGS+\offset(%rsp) |
| 180 | .endm | 219 | .endm |
| 181 | 220 | ||
| 182 | .macro RESTORE_TOP_OF_STACK tmp,offset=0 | 221 | .macro RESTORE_TOP_OF_STACK tmp offset=0 |
| 183 | movq RSP-\offset(%rsp),\tmp | 222 | movq RSP+\offset(%rsp),\tmp |
| 184 | movq \tmp,%gs:pda_oldrsp | 223 | movq \tmp,%gs:pda_oldrsp |
| 185 | movq EFLAGS-\offset(%rsp),\tmp | 224 | movq EFLAGS+\offset(%rsp),\tmp |
| 186 | movq \tmp,R11-\offset(%rsp) | 225 | movq \tmp,R11+\offset(%rsp) |
| 187 | .endm | 226 | .endm |
| 188 | 227 | ||
| 189 | .macro FAKE_STACK_FRAME child_rip | 228 | .macro FAKE_STACK_FRAME child_rip |
| @@ -195,7 +234,7 @@ ENTRY(native_usergs_sysret64) | |||
| 195 | pushq %rax /* rsp */ | 234 | pushq %rax /* rsp */ |
| 196 | CFI_ADJUST_CFA_OFFSET 8 | 235 | CFI_ADJUST_CFA_OFFSET 8 |
| 197 | CFI_REL_OFFSET rsp,0 | 236 | CFI_REL_OFFSET rsp,0 |
| 198 | pushq $(1<<9) /* eflags - interrupts on */ | 237 | pushq $X86_EFLAGS_IF /* eflags - interrupts on */ |
| 199 | CFI_ADJUST_CFA_OFFSET 8 | 238 | CFI_ADJUST_CFA_OFFSET 8 |
| 200 | /*CFI_REL_OFFSET rflags,0*/ | 239 | /*CFI_REL_OFFSET rflags,0*/ |
| 201 | pushq $__KERNEL_CS /* cs */ | 240 | pushq $__KERNEL_CS /* cs */ |
| @@ -213,62 +252,184 @@ ENTRY(native_usergs_sysret64) | |||
| 213 | CFI_ADJUST_CFA_OFFSET -(6*8) | 252 | CFI_ADJUST_CFA_OFFSET -(6*8) |
| 214 | .endm | 253 | .endm |
| 215 | 254 | ||
| 216 | .macro CFI_DEFAULT_STACK start=1 | 255 | /* |
| 256 | * initial frame state for interrupts (and exceptions without error code) | ||
| 257 | */ | ||
| 258 | .macro EMPTY_FRAME start=1 offset=0 | ||
| 217 | .if \start | 259 | .if \start |
| 218 | CFI_STARTPROC simple | 260 | CFI_STARTPROC simple |
| 219 | CFI_SIGNAL_FRAME | 261 | CFI_SIGNAL_FRAME |
| 220 | CFI_DEF_CFA rsp,SS+8 | 262 | CFI_DEF_CFA rsp,8+\offset |
| 221 | .else | 263 | .else |
| 222 | CFI_DEF_CFA_OFFSET SS+8 | 264 | CFI_DEF_CFA_OFFSET 8+\offset |
| 223 | .endif | 265 | .endif |
| 224 | CFI_REL_OFFSET r15,R15 | ||
| 225 | CFI_REL_OFFSET r14,R14 | ||
| 226 | CFI_REL_OFFSET r13,R13 | ||
| 227 | CFI_REL_OFFSET r12,R12 | ||
| 228 | CFI_REL_OFFSET rbp,RBP | ||
| 229 | CFI_REL_OFFSET rbx,RBX | ||
| 230 | CFI_REL_OFFSET r11,R11 | ||
| 231 | CFI_REL_OFFSET r10,R10 | ||
| 232 | CFI_REL_OFFSET r9,R9 | ||
| 233 | CFI_REL_OFFSET r8,R8 | ||
| 234 | CFI_REL_OFFSET rax,RAX | ||
| 235 | CFI_REL_OFFSET rcx,RCX | ||
| 236 | CFI_REL_OFFSET rdx,RDX | ||
| 237 | CFI_REL_OFFSET rsi,RSI | ||
| 238 | CFI_REL_OFFSET rdi,RDI | ||
| 239 | CFI_REL_OFFSET rip,RIP | ||
| 240 | /*CFI_REL_OFFSET cs,CS*/ | ||
| 241 | /*CFI_REL_OFFSET rflags,EFLAGS*/ | ||
| 242 | CFI_REL_OFFSET rsp,RSP | ||
| 243 | /*CFI_REL_OFFSET ss,SS*/ | ||
| 244 | .endm | 266 | .endm |
| 267 | |||
| 268 | /* | ||
| 269 | * initial frame state for interrupts (and exceptions without error code) | ||
| 270 | */ | ||
| 271 | .macro INTR_FRAME start=1 offset=0 | ||
| 272 | EMPTY_FRAME \start, SS+8+\offset-RIP | ||
| 273 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ | ||
| 274 | CFI_REL_OFFSET rsp, RSP+\offset-RIP | ||
| 275 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ | ||
| 276 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ | ||
| 277 | CFI_REL_OFFSET rip, RIP+\offset-RIP | ||
| 278 | .endm | ||
| 279 | |||
| 280 | /* | ||
| 281 | * initial frame state for exceptions with error code (and interrupts | ||
| 282 | * with vector already pushed) | ||
| 283 | */ | ||
| 284 | .macro XCPT_FRAME start=1 offset=0 | ||
| 285 | INTR_FRAME \start, RIP+\offset-ORIG_RAX | ||
| 286 | /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ | ||
| 287 | .endm | ||
| 288 | |||
| 245 | /* | 289 | /* |
| 246 | * A newly forked process directly context switches into this. | 290 | * frame that enables calling into C. |
| 247 | */ | 291 | */ |
| 248 | /* rdi: prev */ | 292 | .macro PARTIAL_FRAME start=1 offset=0 |
| 293 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET | ||
| 294 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET | ||
| 295 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET | ||
| 296 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET | ||
| 297 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET | ||
| 298 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET | ||
| 299 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET | ||
| 300 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET | ||
| 301 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET | ||
| 302 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET | ||
| 303 | .endm | ||
| 304 | |||
| 305 | /* | ||
| 306 | * frame that enables passing a complete pt_regs to a C function. | ||
| 307 | */ | ||
| 308 | .macro DEFAULT_FRAME start=1 offset=0 | ||
| 309 | PARTIAL_FRAME \start, R11+\offset-R15 | ||
| 310 | CFI_REL_OFFSET rbx, RBX+\offset | ||
| 311 | CFI_REL_OFFSET rbp, RBP+\offset | ||
| 312 | CFI_REL_OFFSET r12, R12+\offset | ||
| 313 | CFI_REL_OFFSET r13, R13+\offset | ||
| 314 | CFI_REL_OFFSET r14, R14+\offset | ||
| 315 | CFI_REL_OFFSET r15, R15+\offset | ||
| 316 | .endm | ||
| 317 | |||
| 318 | /* save partial stack frame */ | ||
| 319 | ENTRY(save_args) | ||
| 320 | XCPT_FRAME | ||
| 321 | cld | ||
| 322 | movq_cfi rdi, RDI+16-ARGOFFSET | ||
| 323 | movq_cfi rsi, RSI+16-ARGOFFSET | ||
| 324 | movq_cfi rdx, RDX+16-ARGOFFSET | ||
| 325 | movq_cfi rcx, RCX+16-ARGOFFSET | ||
| 326 | movq_cfi rax, RAX+16-ARGOFFSET | ||
| 327 | movq_cfi r8, R8+16-ARGOFFSET | ||
| 328 | movq_cfi r9, R9+16-ARGOFFSET | ||
| 329 | movq_cfi r10, R10+16-ARGOFFSET | ||
| 330 | movq_cfi r11, R11+16-ARGOFFSET | ||
| 331 | |||
| 332 | leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ | ||
| 333 | movq_cfi rbp, 8 /* push %rbp */ | ||
| 334 | leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ | ||
| 335 | testl $3, CS(%rdi) | ||
| 336 | je 1f | ||
| 337 | SWAPGS | ||
| 338 | /* | ||
| 339 | * irqcount is used to check if a CPU is already on an interrupt stack | ||
| 340 | * or not. While this is essentially redundant with preempt_count it is | ||
| 341 | * a little cheaper to use a separate counter in the PDA (short of | ||
| 342 | * moving irq_enter into assembly, which would be too much work) | ||
| 343 | */ | ||
| 344 | 1: incl %gs:pda_irqcount | ||
| 345 | jne 2f | ||
| 346 | popq_cfi %rax /* move return address... */ | ||
| 347 | mov %gs:pda_irqstackptr,%rsp | ||
| 348 | EMPTY_FRAME 0 | ||
| 349 | pushq_cfi %rax /* ... to the new stack */ | ||
| 350 | /* | ||
| 351 | * We entered an interrupt context - irqs are off: | ||
| 352 | */ | ||
| 353 | 2: TRACE_IRQS_OFF | ||
| 354 | ret | ||
| 355 | CFI_ENDPROC | ||
| 356 | END(save_args) | ||
| 357 | |||
| 358 | ENTRY(save_rest) | ||
| 359 | PARTIAL_FRAME 1 REST_SKIP+8 | ||
| 360 | movq 5*8+16(%rsp), %r11 /* save return address */ | ||
| 361 | movq_cfi rbx, RBX+16 | ||
| 362 | movq_cfi rbp, RBP+16 | ||
| 363 | movq_cfi r12, R12+16 | ||
| 364 | movq_cfi r13, R13+16 | ||
| 365 | movq_cfi r14, R14+16 | ||
| 366 | movq_cfi r15, R15+16 | ||
| 367 | movq %r11, 8(%rsp) /* return address */ | ||
| 368 | FIXUP_TOP_OF_STACK %r11, 16 | ||
| 369 | ret | ||
| 370 | CFI_ENDPROC | ||
| 371 | END(save_rest) | ||
| 372 | |||
| 373 | /* save complete stack frame */ | ||
| 374 | ENTRY(save_paranoid) | ||
| 375 | XCPT_FRAME 1 RDI+8 | ||
| 376 | cld | ||
| 377 | movq_cfi rdi, RDI+8 | ||
| 378 | movq_cfi rsi, RSI+8 | ||
| 379 | movq_cfi rdx, RDX+8 | ||
| 380 | movq_cfi rcx, RCX+8 | ||
| 381 | movq_cfi rax, RAX+8 | ||
| 382 | movq_cfi r8, R8+8 | ||
| 383 | movq_cfi r9, R9+8 | ||
| 384 | movq_cfi r10, R10+8 | ||
| 385 | movq_cfi r11, R11+8 | ||
| 386 | movq_cfi rbx, RBX+8 | ||
| 387 | movq_cfi rbp, RBP+8 | ||
| 388 | movq_cfi r12, R12+8 | ||
| 389 | movq_cfi r13, R13+8 | ||
| 390 | movq_cfi r14, R14+8 | ||
| 391 | movq_cfi r15, R15+8 | ||
| 392 | movl $1,%ebx | ||
| 393 | movl $MSR_GS_BASE,%ecx | ||
| 394 | rdmsr | ||
| 395 | testl %edx,%edx | ||
| 396 | js 1f /* negative -> in kernel */ | ||
| 397 | SWAPGS | ||
| 398 | xorl %ebx,%ebx | ||
| 399 | 1: ret | ||
| 400 | CFI_ENDPROC | ||
| 401 | END(save_paranoid) | ||
| 402 | |||
| 403 | /* | ||
| 404 | * A newly forked process directly context switches into this address. | ||
| 405 | * | ||
| 406 | * rdi: prev task we switched from | ||
| 407 | */ | ||
| 249 | ENTRY(ret_from_fork) | 408 | ENTRY(ret_from_fork) |
| 250 | CFI_DEFAULT_STACK | 409 | DEFAULT_FRAME |
| 410 | |||
| 251 | push kernel_eflags(%rip) | 411 | push kernel_eflags(%rip) |
| 252 | CFI_ADJUST_CFA_OFFSET 8 | 412 | CFI_ADJUST_CFA_OFFSET 8 |
| 253 | popf # reset kernel eflags | 413 | popf # reset kernel eflags |
| 254 | CFI_ADJUST_CFA_OFFSET -8 | 414 | CFI_ADJUST_CFA_OFFSET -8 |
| 255 | call schedule_tail | 415 | |
| 416 | call schedule_tail # rdi: 'prev' task parameter | ||
| 417 | |||
| 256 | GET_THREAD_INFO(%rcx) | 418 | GET_THREAD_INFO(%rcx) |
| 257 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 419 | |
| 258 | jnz rff_trace | 420 | CFI_REMEMBER_STATE |
| 259 | rff_action: | ||
| 260 | RESTORE_REST | 421 | RESTORE_REST |
| 261 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? | 422 | |
| 423 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | ||
| 262 | je int_ret_from_sys_call | 424 | je int_ret_from_sys_call |
| 263 | testl $_TIF_IA32,TI_flags(%rcx) | 425 | |
| 426 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | ||
| 264 | jnz int_ret_from_sys_call | 427 | jnz int_ret_from_sys_call |
| 265 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET | 428 | |
| 266 | jmp ret_from_sys_call | 429 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET |
| 267 | rff_trace: | 430 | jmp ret_from_sys_call # go to the SYSRET fastpath |
| 268 | movq %rsp,%rdi | 431 | |
| 269 | call syscall_trace_leave | 432 | CFI_RESTORE_STATE |
| 270 | GET_THREAD_INFO(%rcx) | ||
| 271 | jmp rff_action | ||
| 272 | CFI_ENDPROC | 433 | CFI_ENDPROC |
| 273 | END(ret_from_fork) | 434 | END(ret_from_fork) |
| 274 | 435 | ||
| @@ -278,20 +439,20 @@ END(ret_from_fork) | |||
| 278 | * SYSCALL does not save anything on the stack and does not change the | 439 | * SYSCALL does not save anything on the stack and does not change the |
| 279 | * stack pointer. | 440 | * stack pointer. |
| 280 | */ | 441 | */ |
| 281 | 442 | ||
| 282 | /* | 443 | /* |
| 283 | * Register setup: | 444 | * Register setup: |
| 284 | * rax system call number | 445 | * rax system call number |
| 285 | * rdi arg0 | 446 | * rdi arg0 |
| 286 | * rcx return address for syscall/sysret, C arg3 | 447 | * rcx return address for syscall/sysret, C arg3 |
| 287 | * rsi arg1 | 448 | * rsi arg1 |
| 288 | * rdx arg2 | 449 | * rdx arg2 |
| 289 | * r10 arg3 (--> moved to rcx for C) | 450 | * r10 arg3 (--> moved to rcx for C) |
| 290 | * r8 arg4 | 451 | * r8 arg4 |
| 291 | * r9 arg5 | 452 | * r9 arg5 |
| 292 | * r11 eflags for syscall/sysret, temporary for C | 453 | * r11 eflags for syscall/sysret, temporary for C |
| 293 | * r12-r15,rbp,rbx saved by C code, not touched. | 454 | * r12-r15,rbp,rbx saved by C code, not touched. |
| 294 | * | 455 | * |
| 295 | * Interrupts are off on entry. | 456 | * Interrupts are off on entry. |
| 296 | * Only called from user space. | 457 | * Only called from user space. |
| 297 | * | 458 | * |
| @@ -301,7 +462,7 @@ END(ret_from_fork) | |||
| 301 | * When user can change the frames always force IRET. That is because | 462 | * When user can change the frames always force IRET. That is because |
| 302 | * it deals with uncanonical addresses better. SYSRET has trouble | 463 | * it deals with uncanonical addresses better. SYSRET has trouble |
| 303 | * with them due to bugs in both AMD and Intel CPUs. | 464 | * with them due to bugs in both AMD and Intel CPUs. |
| 304 | */ | 465 | */ |
| 305 | 466 | ||
| 306 | ENTRY(system_call) | 467 | ENTRY(system_call) |
| 307 | CFI_STARTPROC simple | 468 | CFI_STARTPROC simple |
| @@ -317,7 +478,7 @@ ENTRY(system_call) | |||
| 317 | */ | 478 | */ |
| 318 | ENTRY(system_call_after_swapgs) | 479 | ENTRY(system_call_after_swapgs) |
| 319 | 480 | ||
| 320 | movq %rsp,%gs:pda_oldrsp | 481 | movq %rsp,%gs:pda_oldrsp |
| 321 | movq %gs:pda_kernelstack,%rsp | 482 | movq %gs:pda_kernelstack,%rsp |
| 322 | /* | 483 | /* |
| 323 | * No need to follow this irqs off/on section - it's straight | 484 | * No need to follow this irqs off/on section - it's straight |
| @@ -325,7 +486,7 @@ ENTRY(system_call_after_swapgs) | |||
| 325 | */ | 486 | */ |
| 326 | ENABLE_INTERRUPTS(CLBR_NONE) | 487 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 327 | SAVE_ARGS 8,1 | 488 | SAVE_ARGS 8,1 |
| 328 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 489 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
| 329 | movq %rcx,RIP-ARGOFFSET(%rsp) | 490 | movq %rcx,RIP-ARGOFFSET(%rsp) |
| 330 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 491 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
| 331 | GET_THREAD_INFO(%rcx) | 492 | GET_THREAD_INFO(%rcx) |
| @@ -339,19 +500,19 @@ system_call_fastpath: | |||
| 339 | movq %rax,RAX-ARGOFFSET(%rsp) | 500 | movq %rax,RAX-ARGOFFSET(%rsp) |
| 340 | /* | 501 | /* |
| 341 | * Syscall return path ending with SYSRET (fast path) | 502 | * Syscall return path ending with SYSRET (fast path) |
| 342 | * Has incomplete stack frame and undefined top of stack. | 503 | * Has incomplete stack frame and undefined top of stack. |
| 343 | */ | 504 | */ |
| 344 | ret_from_sys_call: | 505 | ret_from_sys_call: |
| 345 | movl $_TIF_ALLWORK_MASK,%edi | 506 | movl $_TIF_ALLWORK_MASK,%edi |
| 346 | /* edi: flagmask */ | 507 | /* edi: flagmask */ |
| 347 | sysret_check: | 508 | sysret_check: |
| 348 | LOCKDEP_SYS_EXIT | 509 | LOCKDEP_SYS_EXIT |
| 349 | GET_THREAD_INFO(%rcx) | 510 | GET_THREAD_INFO(%rcx) |
| 350 | DISABLE_INTERRUPTS(CLBR_NONE) | 511 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 351 | TRACE_IRQS_OFF | 512 | TRACE_IRQS_OFF |
| 352 | movl TI_flags(%rcx),%edx | 513 | movl TI_flags(%rcx),%edx |
| 353 | andl %edi,%edx | 514 | andl %edi,%edx |
| 354 | jnz sysret_careful | 515 | jnz sysret_careful |
| 355 | CFI_REMEMBER_STATE | 516 | CFI_REMEMBER_STATE |
| 356 | /* | 517 | /* |
| 357 | * sysretq will re-enable interrupts: | 518 | * sysretq will re-enable interrupts: |
| @@ -366,7 +527,7 @@ sysret_check: | |||
| 366 | 527 | ||
| 367 | CFI_RESTORE_STATE | 528 | CFI_RESTORE_STATE |
| 368 | /* Handle reschedules */ | 529 | /* Handle reschedules */ |
| 369 | /* edx: work, edi: workmask */ | 530 | /* edx: work, edi: workmask */ |
| 370 | sysret_careful: | 531 | sysret_careful: |
| 371 | bt $TIF_NEED_RESCHED,%edx | 532 | bt $TIF_NEED_RESCHED,%edx |
| 372 | jnc sysret_signal | 533 | jnc sysret_signal |
| @@ -379,7 +540,7 @@ sysret_careful: | |||
| 379 | CFI_ADJUST_CFA_OFFSET -8 | 540 | CFI_ADJUST_CFA_OFFSET -8 |
| 380 | jmp sysret_check | 541 | jmp sysret_check |
| 381 | 542 | ||
| 382 | /* Handle a signal */ | 543 | /* Handle a signal */ |
| 383 | sysret_signal: | 544 | sysret_signal: |
| 384 | TRACE_IRQS_ON | 545 | TRACE_IRQS_ON |
| 385 | ENABLE_INTERRUPTS(CLBR_NONE) | 546 | ENABLE_INTERRUPTS(CLBR_NONE) |
| @@ -388,17 +549,20 @@ sysret_signal: | |||
| 388 | jc sysret_audit | 549 | jc sysret_audit |
| 389 | #endif | 550 | #endif |
| 390 | /* edx: work flags (arg3) */ | 551 | /* edx: work flags (arg3) */ |
| 391 | leaq do_notify_resume(%rip),%rax | ||
| 392 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 552 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
| 393 | xorl %esi,%esi # oldset -> arg2 | 553 | xorl %esi,%esi # oldset -> arg2 |
| 394 | call ptregscall_common | 554 | SAVE_REST |
| 555 | FIXUP_TOP_OF_STACK %r11 | ||
| 556 | call do_notify_resume | ||
| 557 | RESTORE_TOP_OF_STACK %r11 | ||
| 558 | RESTORE_REST | ||
| 395 | movl $_TIF_WORK_MASK,%edi | 559 | movl $_TIF_WORK_MASK,%edi |
| 396 | /* Use IRET because user could have changed frame. This | 560 | /* Use IRET because user could have changed frame. This |
| 397 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 561 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
| 398 | DISABLE_INTERRUPTS(CLBR_NONE) | 562 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 399 | TRACE_IRQS_OFF | 563 | TRACE_IRQS_OFF |
| 400 | jmp int_with_check | 564 | jmp int_with_check |
| 401 | 565 | ||
| 402 | badsys: | 566 | badsys: |
| 403 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 567 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
| 404 | jmp ret_from_sys_call | 568 | jmp ret_from_sys_call |
| @@ -437,7 +601,7 @@ sysret_audit: | |||
| 437 | #endif /* CONFIG_AUDITSYSCALL */ | 601 | #endif /* CONFIG_AUDITSYSCALL */ |
| 438 | 602 | ||
| 439 | /* Do syscall tracing */ | 603 | /* Do syscall tracing */ |
| 440 | tracesys: | 604 | tracesys: |
| 441 | #ifdef CONFIG_AUDITSYSCALL | 605 | #ifdef CONFIG_AUDITSYSCALL |
| 442 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 606 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) |
| 443 | jz auditsys | 607 | jz auditsys |
| @@ -460,8 +624,8 @@ tracesys: | |||
| 460 | call *sys_call_table(,%rax,8) | 624 | call *sys_call_table(,%rax,8) |
| 461 | movq %rax,RAX-ARGOFFSET(%rsp) | 625 | movq %rax,RAX-ARGOFFSET(%rsp) |
| 462 | /* Use IRET because user could have changed frame */ | 626 | /* Use IRET because user could have changed frame */ |
| 463 | 627 | ||
| 464 | /* | 628 | /* |
| 465 | * Syscall return path ending with IRET. | 629 | * Syscall return path ending with IRET. |
| 466 | * Has correct top of stack, but partial stack frame. | 630 | * Has correct top of stack, but partial stack frame. |
| 467 | */ | 631 | */ |
| @@ -505,18 +669,18 @@ int_very_careful: | |||
| 505 | TRACE_IRQS_ON | 669 | TRACE_IRQS_ON |
| 506 | ENABLE_INTERRUPTS(CLBR_NONE) | 670 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 507 | SAVE_REST | 671 | SAVE_REST |
| 508 | /* Check for syscall exit trace */ | 672 | /* Check for syscall exit trace */ |
| 509 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 673 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
| 510 | jz int_signal | 674 | jz int_signal |
| 511 | pushq %rdi | 675 | pushq %rdi |
| 512 | CFI_ADJUST_CFA_OFFSET 8 | 676 | CFI_ADJUST_CFA_OFFSET 8 |
| 513 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 677 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
| 514 | call syscall_trace_leave | 678 | call syscall_trace_leave |
| 515 | popq %rdi | 679 | popq %rdi |
| 516 | CFI_ADJUST_CFA_OFFSET -8 | 680 | CFI_ADJUST_CFA_OFFSET -8 |
| 517 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | 681 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
| 518 | jmp int_restore_rest | 682 | jmp int_restore_rest |
| 519 | 683 | ||
| 520 | int_signal: | 684 | int_signal: |
| 521 | testl $_TIF_DO_NOTIFY_MASK,%edx | 685 | testl $_TIF_DO_NOTIFY_MASK,%edx |
| 522 | jz 1f | 686 | jz 1f |
| @@ -531,22 +695,24 @@ int_restore_rest: | |||
| 531 | jmp int_with_check | 695 | jmp int_with_check |
| 532 | CFI_ENDPROC | 696 | CFI_ENDPROC |
| 533 | END(system_call) | 697 | END(system_call) |
| 534 | 698 | ||
| 535 | /* | 699 | /* |
| 536 | * Certain special system calls that need to save a complete full stack frame. | 700 | * Certain special system calls that need to save a complete full stack frame. |
| 537 | */ | 701 | */ |
| 538 | |||
| 539 | .macro PTREGSCALL label,func,arg | 702 | .macro PTREGSCALL label,func,arg |
| 540 | .globl \label | 703 | ENTRY(\label) |
| 541 | \label: | 704 | PARTIAL_FRAME 1 8 /* offset 8: return address */ |
| 542 | leaq \func(%rip),%rax | 705 | subq $REST_SKIP, %rsp |
| 543 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 706 | CFI_ADJUST_CFA_OFFSET REST_SKIP |
| 544 | jmp ptregscall_common | 707 | call save_rest |
| 708 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | ||
| 709 | leaq 8(%rsp), \arg /* pt_regs pointer */ | ||
| 710 | call \func | ||
| 711 | jmp ptregscall_common | ||
| 712 | CFI_ENDPROC | ||
| 545 | END(\label) | 713 | END(\label) |
| 546 | .endm | 714 | .endm |
| 547 | 715 | ||
| 548 | CFI_STARTPROC | ||
| 549 | |||
| 550 | PTREGSCALL stub_clone, sys_clone, %r8 | 716 | PTREGSCALL stub_clone, sys_clone, %r8 |
| 551 | PTREGSCALL stub_fork, sys_fork, %rdi | 717 | PTREGSCALL stub_fork, sys_fork, %rdi |
| 552 | PTREGSCALL stub_vfork, sys_vfork, %rdi | 718 | PTREGSCALL stub_vfork, sys_vfork, %rdi |
| @@ -554,25 +720,18 @@ END(\label) | |||
| 554 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 720 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
| 555 | 721 | ||
| 556 | ENTRY(ptregscall_common) | 722 | ENTRY(ptregscall_common) |
| 557 | popq %r11 | 723 | DEFAULT_FRAME 1 8 /* offset 8: return address */ |
| 558 | CFI_ADJUST_CFA_OFFSET -8 | 724 | RESTORE_TOP_OF_STACK %r11, 8 |
| 559 | CFI_REGISTER rip, r11 | 725 | movq_cfi_restore R15+8, r15 |
| 560 | SAVE_REST | 726 | movq_cfi_restore R14+8, r14 |
| 561 | movq %r11, %r15 | 727 | movq_cfi_restore R13+8, r13 |
| 562 | CFI_REGISTER rip, r15 | 728 | movq_cfi_restore R12+8, r12 |
| 563 | FIXUP_TOP_OF_STACK %r11 | 729 | movq_cfi_restore RBP+8, rbp |
| 564 | call *%rax | 730 | movq_cfi_restore RBX+8, rbx |
| 565 | RESTORE_TOP_OF_STACK %r11 | 731 | ret $REST_SKIP /* pop extended registers */ |
| 566 | movq %r15, %r11 | ||
| 567 | CFI_REGISTER rip, r11 | ||
| 568 | RESTORE_REST | ||
| 569 | pushq %r11 | ||
| 570 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 571 | CFI_REL_OFFSET rip, 0 | ||
| 572 | ret | ||
| 573 | CFI_ENDPROC | 732 | CFI_ENDPROC |
| 574 | END(ptregscall_common) | 733 | END(ptregscall_common) |
| 575 | 734 | ||
| 576 | ENTRY(stub_execve) | 735 | ENTRY(stub_execve) |
| 577 | CFI_STARTPROC | 736 | CFI_STARTPROC |
| 578 | popq %r11 | 737 | popq %r11 |
| @@ -588,11 +747,11 @@ ENTRY(stub_execve) | |||
| 588 | jmp int_ret_from_sys_call | 747 | jmp int_ret_from_sys_call |
| 589 | CFI_ENDPROC | 748 | CFI_ENDPROC |
| 590 | END(stub_execve) | 749 | END(stub_execve) |
| 591 | 750 | ||
| 592 | /* | 751 | /* |
| 593 | * sigreturn is special because it needs to restore all registers on return. | 752 | * sigreturn is special because it needs to restore all registers on return. |
| 594 | * This cannot be done with SYSRET, so use the IRET return path instead. | 753 | * This cannot be done with SYSRET, so use the IRET return path instead. |
| 595 | */ | 754 | */ |
| 596 | ENTRY(stub_rt_sigreturn) | 755 | ENTRY(stub_rt_sigreturn) |
| 597 | CFI_STARTPROC | 756 | CFI_STARTPROC |
| 598 | addq $8, %rsp | 757 | addq $8, %rsp |
| @@ -608,70 +767,70 @@ ENTRY(stub_rt_sigreturn) | |||
| 608 | END(stub_rt_sigreturn) | 767 | END(stub_rt_sigreturn) |
| 609 | 768 | ||
| 610 | /* | 769 | /* |
| 611 | * initial frame state for interrupts and exceptions | 770 | * Build the entry stubs and pointer table with some assembler magic. |
| 771 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | ||
| 772 | * single cache line on all modern x86 implementations. | ||
| 612 | */ | 773 | */ |
| 613 | .macro _frame ref | 774 | .section .init.rodata,"a" |
| 614 | CFI_STARTPROC simple | 775 | ENTRY(interrupt) |
| 615 | CFI_SIGNAL_FRAME | 776 | .text |
| 616 | CFI_DEF_CFA rsp,SS+8-\ref | 777 | .p2align 5 |
| 617 | /*CFI_REL_OFFSET ss,SS-\ref*/ | 778 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
| 618 | CFI_REL_OFFSET rsp,RSP-\ref | 779 | ENTRY(irq_entries_start) |
| 619 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ | 780 | INTR_FRAME |
| 620 | /*CFI_REL_OFFSET cs,CS-\ref*/ | 781 | vector=FIRST_EXTERNAL_VECTOR |
| 621 | CFI_REL_OFFSET rip,RIP-\ref | 782 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
| 622 | .endm | 783 | .balign 32 |
| 784 | .rept 7 | ||
| 785 | .if vector < NR_VECTORS | ||
| 786 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
| 787 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 788 | .endif | ||
| 789 | 1: pushq $(~vector+0x80) /* Note: always in signed byte range */ | ||
| 790 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 791 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | ||
| 792 | jmp 2f | ||
| 793 | .endif | ||
| 794 | .previous | ||
| 795 | .quad 1b | ||
| 796 | .text | ||
| 797 | vector=vector+1 | ||
| 798 | .endif | ||
| 799 | .endr | ||
| 800 | 2: jmp common_interrupt | ||
| 801 | .endr | ||
| 802 | CFI_ENDPROC | ||
| 803 | END(irq_entries_start) | ||
| 623 | 804 | ||
| 624 | /* initial frame state for interrupts (and exceptions without error code) */ | 805 | .previous |
| 625 | #define INTR_FRAME _frame RIP | 806 | END(interrupt) |
| 626 | /* initial frame state for exceptions with error code (and interrupts with | 807 | .previous |
| 627 | vector already pushed) */ | ||
| 628 | #define XCPT_FRAME _frame ORIG_RAX | ||
| 629 | 808 | ||
| 630 | /* | 809 | /* |
| 631 | * Interrupt entry/exit. | 810 | * Interrupt entry/exit. |
| 632 | * | 811 | * |
| 633 | * Interrupt entry points save only callee clobbered registers in fast path. | 812 | * Interrupt entry points save only callee clobbered registers in fast path. |
| 634 | * | 813 | * |
| 635 | * Entry runs with interrupts off. | 814 | * Entry runs with interrupts off. |
| 636 | */ | 815 | */ |
| 637 | 816 | ||
| 638 | /* 0(%rsp): interrupt number */ | 817 | /* 0(%rsp): ~(interrupt number) */ |
| 639 | .macro interrupt func | 818 | .macro interrupt func |
| 640 | cld | 819 | subq $10*8, %rsp |
| 641 | SAVE_ARGS | 820 | CFI_ADJUST_CFA_OFFSET 10*8 |
| 642 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | 821 | call save_args |
| 643 | pushq %rbp | 822 | PARTIAL_FRAME 0 |
| 644 | /* | ||
| 645 | * Save rbp twice: One is for marking the stack frame, as usual, and the | ||
| 646 | * other, to fill pt_regs properly. This is because bx comes right | ||
| 647 | * before the last saved register in that structure, and not bp. If the | ||
| 648 | * base pointer were in the place bx is today, this would not be needed. | ||
| 649 | */ | ||
| 650 | movq %rbp, -8(%rsp) | ||
| 651 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 652 | CFI_REL_OFFSET rbp, 0 | ||
| 653 | movq %rsp,%rbp | ||
| 654 | CFI_DEF_CFA_REGISTER rbp | ||
| 655 | testl $3,CS(%rdi) | ||
| 656 | je 1f | ||
| 657 | SWAPGS | ||
| 658 | /* irqcount is used to check if a CPU is already on an interrupt | ||
| 659 | stack or not. While this is essentially redundant with preempt_count | ||
| 660 | it is a little cheaper to use a separate counter in the PDA | ||
| 661 | (short of moving irq_enter into assembly, which would be too | ||
| 662 | much work) */ | ||
| 663 | 1: incl %gs:pda_irqcount | ||
| 664 | cmoveq %gs:pda_irqstackptr,%rsp | ||
| 665 | push %rbp # backlink for old unwinder | ||
| 666 | /* | ||
| 667 | * We entered an interrupt context - irqs are off: | ||
| 668 | */ | ||
| 669 | TRACE_IRQS_OFF | ||
| 670 | call \func | 823 | call \func |
| 671 | .endm | 824 | .endm |
| 672 | 825 | ||
| 673 | ENTRY(common_interrupt) | 826 | /* |
| 827 | * The interrupt stubs push (~vector+0x80) onto the stack and | ||
| 828 | * then jump to common_interrupt. | ||
| 829 | */ | ||
| 830 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
| 831 | common_interrupt: | ||
| 674 | XCPT_FRAME | 832 | XCPT_FRAME |
| 833 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | ||
| 675 | interrupt do_IRQ | 834 | interrupt do_IRQ |
| 676 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 835 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
| 677 | ret_from_intr: | 836 | ret_from_intr: |
| @@ -685,12 +844,12 @@ exit_intr: | |||
| 685 | GET_THREAD_INFO(%rcx) | 844 | GET_THREAD_INFO(%rcx) |
| 686 | testl $3,CS-ARGOFFSET(%rsp) | 845 | testl $3,CS-ARGOFFSET(%rsp) |
| 687 | je retint_kernel | 846 | je retint_kernel |
| 688 | 847 | ||
| 689 | /* Interrupt came from user space */ | 848 | /* Interrupt came from user space */ |
| 690 | /* | 849 | /* |
| 691 | * Has a correct top of stack, but a partial stack frame | 850 | * Has a correct top of stack, but a partial stack frame |
| 692 | * %rcx: thread info. Interrupts off. | 851 | * %rcx: thread info. Interrupts off. |
| 693 | */ | 852 | */ |
| 694 | retint_with_reschedule: | 853 | retint_with_reschedule: |
| 695 | movl $_TIF_WORK_MASK,%edi | 854 | movl $_TIF_WORK_MASK,%edi |
| 696 | retint_check: | 855 | retint_check: |
| @@ -763,20 +922,20 @@ retint_careful: | |||
| 763 | pushq %rdi | 922 | pushq %rdi |
| 764 | CFI_ADJUST_CFA_OFFSET 8 | 923 | CFI_ADJUST_CFA_OFFSET 8 |
| 765 | call schedule | 924 | call schedule |
| 766 | popq %rdi | 925 | popq %rdi |
| 767 | CFI_ADJUST_CFA_OFFSET -8 | 926 | CFI_ADJUST_CFA_OFFSET -8 |
| 768 | GET_THREAD_INFO(%rcx) | 927 | GET_THREAD_INFO(%rcx) |
| 769 | DISABLE_INTERRUPTS(CLBR_NONE) | 928 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 770 | TRACE_IRQS_OFF | 929 | TRACE_IRQS_OFF |
| 771 | jmp retint_check | 930 | jmp retint_check |
| 772 | 931 | ||
| 773 | retint_signal: | 932 | retint_signal: |
| 774 | testl $_TIF_DO_NOTIFY_MASK,%edx | 933 | testl $_TIF_DO_NOTIFY_MASK,%edx |
| 775 | jz retint_swapgs | 934 | jz retint_swapgs |
| 776 | TRACE_IRQS_ON | 935 | TRACE_IRQS_ON |
| 777 | ENABLE_INTERRUPTS(CLBR_NONE) | 936 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 778 | SAVE_REST | 937 | SAVE_REST |
| 779 | movq $-1,ORIG_RAX(%rsp) | 938 | movq $-1,ORIG_RAX(%rsp) |
| 780 | xorl %esi,%esi # oldset | 939 | xorl %esi,%esi # oldset |
| 781 | movq %rsp,%rdi # &pt_regs | 940 | movq %rsp,%rdi # &pt_regs |
| 782 | call do_notify_resume | 941 | call do_notify_resume |
| @@ -798,324 +957,211 @@ ENTRY(retint_kernel) | |||
| 798 | jnc retint_restore_args | 957 | jnc retint_restore_args |
| 799 | call preempt_schedule_irq | 958 | call preempt_schedule_irq |
| 800 | jmp exit_intr | 959 | jmp exit_intr |
| 801 | #endif | 960 | #endif |
| 802 | 961 | ||
| 803 | CFI_ENDPROC | 962 | CFI_ENDPROC |
| 804 | END(common_interrupt) | 963 | END(common_interrupt) |
| 805 | 964 | ||
| 806 | /* | 965 | /* |
| 807 | * APIC interrupts. | 966 | * APIC interrupts. |
| 808 | */ | 967 | */ |
| 809 | .macro apicinterrupt num,func | 968 | .macro apicinterrupt num sym do_sym |
| 969 | ENTRY(\sym) | ||
| 810 | INTR_FRAME | 970 | INTR_FRAME |
| 811 | pushq $~(\num) | 971 | pushq $~(\num) |
| 812 | CFI_ADJUST_CFA_OFFSET 8 | 972 | CFI_ADJUST_CFA_OFFSET 8 |
| 813 | interrupt \func | 973 | interrupt \do_sym |
| 814 | jmp ret_from_intr | 974 | jmp ret_from_intr |
| 815 | CFI_ENDPROC | 975 | CFI_ENDPROC |
| 816 | .endm | 976 | END(\sym) |
| 817 | 977 | .endm | |
| 818 | ENTRY(thermal_interrupt) | ||
| 819 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | ||
| 820 | END(thermal_interrupt) | ||
| 821 | |||
| 822 | ENTRY(threshold_interrupt) | ||
| 823 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | ||
| 824 | END(threshold_interrupt) | ||
| 825 | |||
| 826 | #ifdef CONFIG_SMP | ||
| 827 | ENTRY(reschedule_interrupt) | ||
| 828 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | ||
| 829 | END(reschedule_interrupt) | ||
| 830 | |||
| 831 | .macro INVALIDATE_ENTRY num | ||
| 832 | ENTRY(invalidate_interrupt\num) | ||
| 833 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt | ||
| 834 | END(invalidate_interrupt\num) | ||
| 835 | .endm | ||
| 836 | 978 | ||
| 837 | INVALIDATE_ENTRY 0 | 979 | #ifdef CONFIG_SMP |
| 838 | INVALIDATE_ENTRY 1 | 980 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ |
| 839 | INVALIDATE_ENTRY 2 | 981 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
| 840 | INVALIDATE_ENTRY 3 | ||
| 841 | INVALIDATE_ENTRY 4 | ||
| 842 | INVALIDATE_ENTRY 5 | ||
| 843 | INVALIDATE_ENTRY 6 | ||
| 844 | INVALIDATE_ENTRY 7 | ||
| 845 | |||
| 846 | ENTRY(call_function_interrupt) | ||
| 847 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | ||
| 848 | END(call_function_interrupt) | ||
| 849 | ENTRY(call_function_single_interrupt) | ||
| 850 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt | ||
| 851 | END(call_function_single_interrupt) | ||
| 852 | ENTRY(irq_move_cleanup_interrupt) | ||
| 853 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt | ||
| 854 | END(irq_move_cleanup_interrupt) | ||
| 855 | #endif | 982 | #endif |
| 856 | 983 | ||
| 857 | ENTRY(apic_timer_interrupt) | 984 | apicinterrupt UV_BAU_MESSAGE \ |
| 858 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | 985 | uv_bau_message_intr1 uv_bau_message_interrupt |
| 859 | END(apic_timer_interrupt) | 986 | apicinterrupt LOCAL_TIMER_VECTOR \ |
| 987 | apic_timer_interrupt smp_apic_timer_interrupt | ||
| 988 | |||
| 989 | #ifdef CONFIG_SMP | ||
| 990 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | ||
| 991 | invalidate_interrupt0 smp_invalidate_interrupt | ||
| 992 | apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ | ||
| 993 | invalidate_interrupt1 smp_invalidate_interrupt | ||
| 994 | apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ | ||
| 995 | invalidate_interrupt2 smp_invalidate_interrupt | ||
| 996 | apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ | ||
| 997 | invalidate_interrupt3 smp_invalidate_interrupt | ||
| 998 | apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ | ||
| 999 | invalidate_interrupt4 smp_invalidate_interrupt | ||
| 1000 | apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ | ||
| 1001 | invalidate_interrupt5 smp_invalidate_interrupt | ||
| 1002 | apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ | ||
| 1003 | invalidate_interrupt6 smp_invalidate_interrupt | ||
| 1004 | apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ | ||
| 1005 | invalidate_interrupt7 smp_invalidate_interrupt | ||
| 1006 | #endif | ||
| 860 | 1007 | ||
| 861 | ENTRY(uv_bau_message_intr1) | 1008 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
| 862 | apicinterrupt 220,uv_bau_message_interrupt | 1009 | threshold_interrupt mce_threshold_interrupt |
| 863 | END(uv_bau_message_intr1) | 1010 | apicinterrupt THERMAL_APIC_VECTOR \ |
| 1011 | thermal_interrupt smp_thermal_interrupt | ||
| 1012 | |||
| 1013 | #ifdef CONFIG_SMP | ||
| 1014 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | ||
| 1015 | call_function_single_interrupt smp_call_function_single_interrupt | ||
| 1016 | apicinterrupt CALL_FUNCTION_VECTOR \ | ||
| 1017 | call_function_interrupt smp_call_function_interrupt | ||
| 1018 | apicinterrupt RESCHEDULE_VECTOR \ | ||
| 1019 | reschedule_interrupt smp_reschedule_interrupt | ||
| 1020 | #endif | ||
| 864 | 1021 | ||
| 865 | ENTRY(error_interrupt) | 1022 | apicinterrupt ERROR_APIC_VECTOR \ |
| 866 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt | 1023 | error_interrupt smp_error_interrupt |
| 867 | END(error_interrupt) | 1024 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
| 1025 | spurious_interrupt smp_spurious_interrupt | ||
| 868 | 1026 | ||
| 869 | ENTRY(spurious_interrupt) | ||
| 870 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | ||
| 871 | END(spurious_interrupt) | ||
| 872 | |||
| 873 | /* | 1027 | /* |
| 874 | * Exception entry points. | 1028 | * Exception entry points. |
| 875 | */ | 1029 | */ |
| 876 | .macro zeroentry sym | 1030 | .macro zeroentry sym do_sym |
| 1031 | ENTRY(\sym) | ||
| 877 | INTR_FRAME | 1032 | INTR_FRAME |
| 878 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1033 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 879 | pushq $0 /* push error code/oldrax */ | 1034 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
| 880 | CFI_ADJUST_CFA_OFFSET 8 | 1035 | subq $15*8,%rsp |
| 881 | pushq %rax /* push real oldrax to the rdi slot */ | 1036 | CFI_ADJUST_CFA_OFFSET 15*8 |
| 882 | CFI_ADJUST_CFA_OFFSET 8 | 1037 | call error_entry |
| 883 | CFI_REL_OFFSET rax,0 | 1038 | DEFAULT_FRAME 0 |
| 884 | leaq \sym(%rip),%rax | 1039 | movq %rsp,%rdi /* pt_regs pointer */ |
| 885 | jmp error_entry | 1040 | xorl %esi,%esi /* no error code */ |
| 1041 | call \do_sym | ||
| 1042 | jmp error_exit /* %ebx: no swapgs flag */ | ||
| 886 | CFI_ENDPROC | 1043 | CFI_ENDPROC |
| 887 | .endm | 1044 | END(\sym) |
| 1045 | .endm | ||
| 888 | 1046 | ||
| 889 | .macro errorentry sym | 1047 | .macro paranoidzeroentry sym do_sym |
| 890 | XCPT_FRAME | 1048 | ENTRY(\sym) |
| 1049 | INTR_FRAME | ||
| 891 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1050 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 892 | pushq %rax | 1051 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
| 893 | CFI_ADJUST_CFA_OFFSET 8 | 1052 | CFI_ADJUST_CFA_OFFSET 8 |
| 894 | CFI_REL_OFFSET rax,0 | 1053 | subq $15*8, %rsp |
| 895 | leaq \sym(%rip),%rax | 1054 | call save_paranoid |
| 896 | jmp error_entry | 1055 | TRACE_IRQS_OFF |
| 1056 | movq %rsp,%rdi /* pt_regs pointer */ | ||
| 1057 | xorl %esi,%esi /* no error code */ | ||
| 1058 | call \do_sym | ||
| 1059 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
| 897 | CFI_ENDPROC | 1060 | CFI_ENDPROC |
| 898 | .endm | 1061 | END(\sym) |
| 1062 | .endm | ||
| 899 | 1063 | ||
| 900 | /* error code is on the stack already */ | 1064 | .macro paranoidzeroentry_ist sym do_sym ist |
| 901 | /* handle NMI like exceptions that can happen everywhere */ | 1065 | ENTRY(\sym) |
| 902 | .macro paranoidentry sym, ist=0, irqtrace=1 | 1066 | INTR_FRAME |
| 903 | SAVE_ALL | 1067 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 904 | cld | 1068 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
| 905 | movl $1,%ebx | 1069 | CFI_ADJUST_CFA_OFFSET 8 |
| 906 | movl $MSR_GS_BASE,%ecx | 1070 | subq $15*8, %rsp |
| 907 | rdmsr | 1071 | call save_paranoid |
| 908 | testl %edx,%edx | ||
| 909 | js 1f | ||
| 910 | SWAPGS | ||
| 911 | xorl %ebx,%ebx | ||
| 912 | 1: | ||
| 913 | .if \ist | ||
| 914 | movq %gs:pda_data_offset, %rbp | ||
| 915 | .endif | ||
| 916 | .if \irqtrace | ||
| 917 | TRACE_IRQS_OFF | ||
| 918 | .endif | ||
| 919 | movq %rsp,%rdi | ||
| 920 | movq ORIG_RAX(%rsp),%rsi | ||
| 921 | movq $-1,ORIG_RAX(%rsp) | ||
| 922 | .if \ist | ||
| 923 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
| 924 | .endif | ||
| 925 | call \sym | ||
| 926 | .if \ist | ||
| 927 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
| 928 | .endif | ||
| 929 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
| 930 | .if \irqtrace | ||
| 931 | TRACE_IRQS_OFF | 1072 | TRACE_IRQS_OFF |
| 932 | .endif | 1073 | movq %rsp,%rdi /* pt_regs pointer */ |
| 933 | .endm | 1074 | xorl %esi,%esi /* no error code */ |
| 1075 | movq %gs:pda_data_offset, %rbp | ||
| 1076 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
| 1077 | call \do_sym | ||
| 1078 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
| 1079 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
| 1080 | CFI_ENDPROC | ||
| 1081 | END(\sym) | ||
| 1082 | .endm | ||
| 934 | 1083 | ||
| 935 | /* | 1084 | .macro errorentry sym do_sym |
| 936 | * "Paranoid" exit path from exception stack. | 1085 | ENTRY(\sym) |
| 937 | * Paranoid because this is used by NMIs and cannot take | 1086 | XCPT_FRAME |
| 938 | * any kernel state for granted. | 1087 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 939 | * We don't do kernel preemption checks here, because only | 1088 | subq $15*8,%rsp |
| 940 | * NMI should be common and it does not enable IRQs and | 1089 | CFI_ADJUST_CFA_OFFSET 15*8 |
| 941 | * cannot get reschedule ticks. | 1090 | call error_entry |
| 942 | * | 1091 | DEFAULT_FRAME 0 |
| 943 | * "trace" is 0 for the NMI handler only, because irq-tracing | 1092 | movq %rsp,%rdi /* pt_regs pointer */ |
| 944 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 1093 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
| 945 | * hard flags at once, atomically) | 1094 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
| 946 | */ | 1095 | call \do_sym |
| 947 | .macro paranoidexit trace=1 | 1096 | jmp error_exit /* %ebx: no swapgs flag */ |
| 948 | /* ebx: no swapgs flag */ | ||
| 949 | paranoid_exit\trace: | ||
| 950 | testl %ebx,%ebx /* swapgs needed? */ | ||
| 951 | jnz paranoid_restore\trace | ||
| 952 | testl $3,CS(%rsp) | ||
| 953 | jnz paranoid_userspace\trace | ||
| 954 | paranoid_swapgs\trace: | ||
| 955 | .if \trace | ||
| 956 | TRACE_IRQS_IRETQ 0 | ||
| 957 | .endif | ||
| 958 | SWAPGS_UNSAFE_STACK | ||
| 959 | paranoid_restore\trace: | ||
| 960 | RESTORE_ALL 8 | ||
| 961 | jmp irq_return | ||
| 962 | paranoid_userspace\trace: | ||
| 963 | GET_THREAD_INFO(%rcx) | ||
| 964 | movl TI_flags(%rcx),%ebx | ||
| 965 | andl $_TIF_WORK_MASK,%ebx | ||
| 966 | jz paranoid_swapgs\trace | ||
| 967 | movq %rsp,%rdi /* &pt_regs */ | ||
| 968 | call sync_regs | ||
| 969 | movq %rax,%rsp /* switch stack for scheduling */ | ||
| 970 | testl $_TIF_NEED_RESCHED,%ebx | ||
| 971 | jnz paranoid_schedule\trace | ||
| 972 | movl %ebx,%edx /* arg3: thread flags */ | ||
| 973 | .if \trace | ||
| 974 | TRACE_IRQS_ON | ||
| 975 | .endif | ||
| 976 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
| 977 | xorl %esi,%esi /* arg2: oldset */ | ||
| 978 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
| 979 | call do_notify_resume | ||
| 980 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
| 981 | .if \trace | ||
| 982 | TRACE_IRQS_OFF | ||
| 983 | .endif | ||
| 984 | jmp paranoid_userspace\trace | ||
| 985 | paranoid_schedule\trace: | ||
| 986 | .if \trace | ||
| 987 | TRACE_IRQS_ON | ||
| 988 | .endif | ||
| 989 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
| 990 | call schedule | ||
| 991 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
| 992 | .if \trace | ||
| 993 | TRACE_IRQS_OFF | ||
| 994 | .endif | ||
| 995 | jmp paranoid_userspace\trace | ||
| 996 | CFI_ENDPROC | 1097 | CFI_ENDPROC |
| 997 | .endm | 1098 | END(\sym) |
| 1099 | .endm | ||
| 998 | 1100 | ||
| 999 | /* | 1101 | /* error code is on the stack already */ |
| 1000 | * Exception entry point. This expects an error code/orig_rax on the stack | 1102 | .macro paranoiderrorentry sym do_sym |
| 1001 | * and the exception handler in %rax. | 1103 | ENTRY(\sym) |
| 1002 | */ | 1104 | XCPT_FRAME |
| 1003 | KPROBE_ENTRY(error_entry) | 1105 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1004 | _frame RDI | 1106 | subq $15*8,%rsp |
| 1005 | CFI_REL_OFFSET rax,0 | 1107 | CFI_ADJUST_CFA_OFFSET 15*8 |
| 1006 | /* rdi slot contains rax, oldrax contains error code */ | 1108 | call save_paranoid |
| 1007 | cld | 1109 | DEFAULT_FRAME 0 |
| 1008 | subq $14*8,%rsp | ||
| 1009 | CFI_ADJUST_CFA_OFFSET (14*8) | ||
| 1010 | movq %rsi,13*8(%rsp) | ||
| 1011 | CFI_REL_OFFSET rsi,RSI | ||
| 1012 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ | ||
| 1013 | CFI_REGISTER rax,rsi | ||
| 1014 | movq %rdx,12*8(%rsp) | ||
| 1015 | CFI_REL_OFFSET rdx,RDX | ||
| 1016 | movq %rcx,11*8(%rsp) | ||
| 1017 | CFI_REL_OFFSET rcx,RCX | ||
| 1018 | movq %rsi,10*8(%rsp) /* store rax */ | ||
| 1019 | CFI_REL_OFFSET rax,RAX | ||
| 1020 | movq %r8, 9*8(%rsp) | ||
| 1021 | CFI_REL_OFFSET r8,R8 | ||
| 1022 | movq %r9, 8*8(%rsp) | ||
| 1023 | CFI_REL_OFFSET r9,R9 | ||
| 1024 | movq %r10,7*8(%rsp) | ||
| 1025 | CFI_REL_OFFSET r10,R10 | ||
| 1026 | movq %r11,6*8(%rsp) | ||
| 1027 | CFI_REL_OFFSET r11,R11 | ||
| 1028 | movq %rbx,5*8(%rsp) | ||
| 1029 | CFI_REL_OFFSET rbx,RBX | ||
| 1030 | movq %rbp,4*8(%rsp) | ||
| 1031 | CFI_REL_OFFSET rbp,RBP | ||
| 1032 | movq %r12,3*8(%rsp) | ||
| 1033 | CFI_REL_OFFSET r12,R12 | ||
| 1034 | movq %r13,2*8(%rsp) | ||
| 1035 | CFI_REL_OFFSET r13,R13 | ||
| 1036 | movq %r14,1*8(%rsp) | ||
| 1037 | CFI_REL_OFFSET r14,R14 | ||
| 1038 | movq %r15,(%rsp) | ||
| 1039 | CFI_REL_OFFSET r15,R15 | ||
| 1040 | xorl %ebx,%ebx | ||
| 1041 | testl $3,CS(%rsp) | ||
| 1042 | je error_kernelspace | ||
| 1043 | error_swapgs: | ||
| 1044 | SWAPGS | ||
| 1045 | error_sti: | ||
| 1046 | TRACE_IRQS_OFF | ||
| 1047 | movq %rdi,RDI(%rsp) | ||
| 1048 | CFI_REL_OFFSET rdi,RDI | ||
| 1049 | movq %rsp,%rdi | ||
| 1050 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | ||
| 1051 | movq $-1,ORIG_RAX(%rsp) | ||
| 1052 | call *%rax | ||
| 1053 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
| 1054 | error_exit: | ||
| 1055 | movl %ebx,%eax | ||
| 1056 | RESTORE_REST | ||
| 1057 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
| 1058 | TRACE_IRQS_OFF | 1110 | TRACE_IRQS_OFF |
| 1059 | GET_THREAD_INFO(%rcx) | 1111 | movq %rsp,%rdi /* pt_regs pointer */ |
| 1060 | testl %eax,%eax | 1112 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
| 1061 | jne retint_kernel | 1113 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
| 1062 | LOCKDEP_SYS_EXIT_IRQ | 1114 | call \do_sym |
| 1063 | movl TI_flags(%rcx),%edx | 1115 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
| 1064 | movl $_TIF_WORK_MASK,%edi | ||
| 1065 | andl %edi,%edx | ||
| 1066 | jnz retint_careful | ||
| 1067 | jmp retint_swapgs | ||
| 1068 | CFI_ENDPROC | 1116 | CFI_ENDPROC |
| 1117 | END(\sym) | ||
| 1118 | .endm | ||
| 1069 | 1119 | ||
| 1070 | error_kernelspace: | 1120 | zeroentry divide_error do_divide_error |
| 1071 | incl %ebx | 1121 | zeroentry overflow do_overflow |
| 1072 | /* There are two places in the kernel that can potentially fault with | 1122 | zeroentry bounds do_bounds |
| 1073 | usergs. Handle them here. The exception handlers after | 1123 | zeroentry invalid_op do_invalid_op |
| 1074 | iret run with kernel gs again, so don't set the user space flag. | 1124 | zeroentry device_not_available do_device_not_available |
| 1075 | B stepping K8s sometimes report an truncated RIP for IRET | 1125 | paranoiderrorentry double_fault do_double_fault |
| 1076 | exceptions returning to compat mode. Check for these here too. */ | 1126 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun |
| 1077 | leaq irq_return(%rip),%rcx | 1127 | errorentry invalid_TSS do_invalid_TSS |
| 1078 | cmpq %rcx,RIP(%rsp) | 1128 | errorentry segment_not_present do_segment_not_present |
| 1079 | je error_swapgs | 1129 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug |
| 1080 | movl %ecx,%ecx /* zero extend */ | 1130 | zeroentry coprocessor_error do_coprocessor_error |
| 1081 | cmpq %rcx,RIP(%rsp) | 1131 | errorentry alignment_check do_alignment_check |
| 1082 | je error_swapgs | 1132 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
| 1083 | cmpq $gs_change,RIP(%rsp) | 1133 | |
| 1084 | je error_swapgs | 1134 | /* Reload gs selector with exception handling */ |
| 1085 | jmp error_sti | 1135 | /* edi: new selector */ |
| 1086 | KPROBE_END(error_entry) | ||
| 1087 | |||
| 1088 | /* Reload gs selector with exception handling */ | ||
| 1089 | /* edi: new selector */ | ||
| 1090 | ENTRY(native_load_gs_index) | 1136 | ENTRY(native_load_gs_index) |
| 1091 | CFI_STARTPROC | 1137 | CFI_STARTPROC |
| 1092 | pushf | 1138 | pushf |
| 1093 | CFI_ADJUST_CFA_OFFSET 8 | 1139 | CFI_ADJUST_CFA_OFFSET 8 |
| 1094 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) | 1140 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) |
| 1095 | SWAPGS | 1141 | SWAPGS |
| 1096 | gs_change: | 1142 | gs_change: |
| 1097 | movl %edi,%gs | 1143 | movl %edi,%gs |
| 1098 | 2: mfence /* workaround */ | 1144 | 2: mfence /* workaround */ |
| 1099 | SWAPGS | 1145 | SWAPGS |
| 1100 | popf | 1146 | popf |
| 1101 | CFI_ADJUST_CFA_OFFSET -8 | 1147 | CFI_ADJUST_CFA_OFFSET -8 |
| 1102 | ret | 1148 | ret |
| 1103 | CFI_ENDPROC | 1149 | CFI_ENDPROC |
| 1104 | ENDPROC(native_load_gs_index) | 1150 | END(native_load_gs_index) |
| 1105 | 1151 | ||
| 1106 | .section __ex_table,"a" | 1152 | .section __ex_table,"a" |
| 1107 | .align 8 | 1153 | .align 8 |
| 1108 | .quad gs_change,bad_gs | 1154 | .quad gs_change,bad_gs |
| 1109 | .previous | 1155 | .previous |
| 1110 | .section .fixup,"ax" | 1156 | .section .fixup,"ax" |
| 1111 | /* running with kernelgs */ | 1157 | /* running with kernelgs */ |
| 1112 | bad_gs: | 1158 | bad_gs: |
| 1113 | SWAPGS /* switch back to user gs */ | 1159 | SWAPGS /* switch back to user gs */ |
| 1114 | xorl %eax,%eax | 1160 | xorl %eax,%eax |
| 1115 | movl %eax,%gs | 1161 | movl %eax,%gs |
| 1116 | jmp 2b | 1162 | jmp 2b |
| 1117 | .previous | 1163 | .previous |
| 1118 | 1164 | ||
| 1119 | /* | 1165 | /* |
| 1120 | * Create a kernel thread. | 1166 | * Create a kernel thread. |
| 1121 | * | 1167 | * |
| @@ -1138,7 +1184,7 @@ ENTRY(kernel_thread) | |||
| 1138 | 1184 | ||
| 1139 | xorl %r8d,%r8d | 1185 | xorl %r8d,%r8d |
| 1140 | xorl %r9d,%r9d | 1186 | xorl %r9d,%r9d |
| 1141 | 1187 | ||
| 1142 | # clone now | 1188 | # clone now |
| 1143 | call do_fork | 1189 | call do_fork |
| 1144 | movq %rax,RAX(%rsp) | 1190 | movq %rax,RAX(%rsp) |
| @@ -1149,15 +1195,15 @@ ENTRY(kernel_thread) | |||
| 1149 | * so internally to the x86_64 port you can rely on kernel_thread() | 1195 | * so internally to the x86_64 port you can rely on kernel_thread() |
| 1150 | * not to reschedule the child before returning, this avoids the need | 1196 | * not to reschedule the child before returning, this avoids the need |
| 1151 | * of hacks for example to fork off the per-CPU idle tasks. | 1197 | * of hacks for example to fork off the per-CPU idle tasks. |
| 1152 | * [Hopefully no generic code relies on the reschedule -AK] | 1198 | * [Hopefully no generic code relies on the reschedule -AK] |
| 1153 | */ | 1199 | */ |
| 1154 | RESTORE_ALL | 1200 | RESTORE_ALL |
| 1155 | UNFAKE_STACK_FRAME | 1201 | UNFAKE_STACK_FRAME |
| 1156 | ret | 1202 | ret |
| 1157 | CFI_ENDPROC | 1203 | CFI_ENDPROC |
| 1158 | ENDPROC(kernel_thread) | 1204 | END(kernel_thread) |
| 1159 | 1205 | ||
| 1160 | child_rip: | 1206 | ENTRY(child_rip) |
| 1161 | pushq $0 # fake return address | 1207 | pushq $0 # fake return address |
| 1162 | CFI_STARTPROC | 1208 | CFI_STARTPROC |
| 1163 | /* | 1209 | /* |
| @@ -1170,8 +1216,9 @@ child_rip: | |||
| 1170 | # exit | 1216 | # exit |
| 1171 | mov %eax, %edi | 1217 | mov %eax, %edi |
| 1172 | call do_exit | 1218 | call do_exit |
| 1219 | ud2 # padding for call trace | ||
| 1173 | CFI_ENDPROC | 1220 | CFI_ENDPROC |
| 1174 | ENDPROC(child_rip) | 1221 | END(child_rip) |
| 1175 | 1222 | ||
| 1176 | /* | 1223 | /* |
| 1177 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1224 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
| @@ -1191,10 +1238,10 @@ ENDPROC(child_rip) | |||
| 1191 | ENTRY(kernel_execve) | 1238 | ENTRY(kernel_execve) |
| 1192 | CFI_STARTPROC | 1239 | CFI_STARTPROC |
| 1193 | FAKE_STACK_FRAME $0 | 1240 | FAKE_STACK_FRAME $0 |
| 1194 | SAVE_ALL | 1241 | SAVE_ALL |
| 1195 | movq %rsp,%rcx | 1242 | movq %rsp,%rcx |
| 1196 | call sys_execve | 1243 | call sys_execve |
| 1197 | movq %rax, RAX(%rsp) | 1244 | movq %rax, RAX(%rsp) |
| 1198 | RESTORE_REST | 1245 | RESTORE_REST |
| 1199 | testq %rax,%rax | 1246 | testq %rax,%rax |
| 1200 | je int_ret_from_sys_call | 1247 | je int_ret_from_sys_call |
| @@ -1202,129 +1249,7 @@ ENTRY(kernel_execve) | |||
| 1202 | UNFAKE_STACK_FRAME | 1249 | UNFAKE_STACK_FRAME |
| 1203 | ret | 1250 | ret |
| 1204 | CFI_ENDPROC | 1251 | CFI_ENDPROC |
| 1205 | ENDPROC(kernel_execve) | 1252 | END(kernel_execve) |
| 1206 | |||
| 1207 | KPROBE_ENTRY(page_fault) | ||
| 1208 | errorentry do_page_fault | ||
| 1209 | KPROBE_END(page_fault) | ||
| 1210 | |||
| 1211 | ENTRY(coprocessor_error) | ||
| 1212 | zeroentry do_coprocessor_error | ||
| 1213 | END(coprocessor_error) | ||
| 1214 | |||
| 1215 | ENTRY(simd_coprocessor_error) | ||
| 1216 | zeroentry do_simd_coprocessor_error | ||
| 1217 | END(simd_coprocessor_error) | ||
| 1218 | |||
| 1219 | ENTRY(device_not_available) | ||
| 1220 | zeroentry do_device_not_available | ||
| 1221 | END(device_not_available) | ||
| 1222 | |||
| 1223 | /* runs on exception stack */ | ||
| 1224 | KPROBE_ENTRY(debug) | ||
| 1225 | INTR_FRAME | ||
| 1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1227 | pushq $0 | ||
| 1228 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1229 | paranoidentry do_debug, DEBUG_STACK | ||
| 1230 | paranoidexit | ||
| 1231 | KPROBE_END(debug) | ||
| 1232 | |||
| 1233 | /* runs on exception stack */ | ||
| 1234 | KPROBE_ENTRY(nmi) | ||
| 1235 | INTR_FRAME | ||
| 1236 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1237 | pushq $-1 | ||
| 1238 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1239 | paranoidentry do_nmi, 0, 0 | ||
| 1240 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
| 1241 | paranoidexit 0 | ||
| 1242 | #else | ||
| 1243 | jmp paranoid_exit1 | ||
| 1244 | CFI_ENDPROC | ||
| 1245 | #endif | ||
| 1246 | KPROBE_END(nmi) | ||
| 1247 | |||
| 1248 | KPROBE_ENTRY(int3) | ||
| 1249 | INTR_FRAME | ||
| 1250 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1251 | pushq $0 | ||
| 1252 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1253 | paranoidentry do_int3, DEBUG_STACK | ||
| 1254 | jmp paranoid_exit1 | ||
| 1255 | CFI_ENDPROC | ||
| 1256 | KPROBE_END(int3) | ||
| 1257 | |||
| 1258 | ENTRY(overflow) | ||
| 1259 | zeroentry do_overflow | ||
| 1260 | END(overflow) | ||
| 1261 | |||
| 1262 | ENTRY(bounds) | ||
| 1263 | zeroentry do_bounds | ||
| 1264 | END(bounds) | ||
| 1265 | |||
| 1266 | ENTRY(invalid_op) | ||
| 1267 | zeroentry do_invalid_op | ||
| 1268 | END(invalid_op) | ||
| 1269 | |||
| 1270 | ENTRY(coprocessor_segment_overrun) | ||
| 1271 | zeroentry do_coprocessor_segment_overrun | ||
| 1272 | END(coprocessor_segment_overrun) | ||
| 1273 | |||
| 1274 | /* runs on exception stack */ | ||
| 1275 | ENTRY(double_fault) | ||
| 1276 | XCPT_FRAME | ||
| 1277 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1278 | paranoidentry do_double_fault | ||
| 1279 | jmp paranoid_exit1 | ||
| 1280 | CFI_ENDPROC | ||
| 1281 | END(double_fault) | ||
| 1282 | |||
| 1283 | ENTRY(invalid_TSS) | ||
| 1284 | errorentry do_invalid_TSS | ||
| 1285 | END(invalid_TSS) | ||
| 1286 | |||
| 1287 | ENTRY(segment_not_present) | ||
| 1288 | errorentry do_segment_not_present | ||
| 1289 | END(segment_not_present) | ||
| 1290 | |||
| 1291 | /* runs on exception stack */ | ||
| 1292 | ENTRY(stack_segment) | ||
| 1293 | XCPT_FRAME | ||
| 1294 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1295 | paranoidentry do_stack_segment | ||
| 1296 | jmp paranoid_exit1 | ||
| 1297 | CFI_ENDPROC | ||
| 1298 | END(stack_segment) | ||
| 1299 | |||
| 1300 | KPROBE_ENTRY(general_protection) | ||
| 1301 | errorentry do_general_protection | ||
| 1302 | KPROBE_END(general_protection) | ||
| 1303 | |||
| 1304 | ENTRY(alignment_check) | ||
| 1305 | errorentry do_alignment_check | ||
| 1306 | END(alignment_check) | ||
| 1307 | |||
| 1308 | ENTRY(divide_error) | ||
| 1309 | zeroentry do_divide_error | ||
| 1310 | END(divide_error) | ||
| 1311 | |||
| 1312 | ENTRY(spurious_interrupt_bug) | ||
| 1313 | zeroentry do_spurious_interrupt_bug | ||
| 1314 | END(spurious_interrupt_bug) | ||
| 1315 | |||
| 1316 | #ifdef CONFIG_X86_MCE | ||
| 1317 | /* runs on exception stack */ | ||
| 1318 | ENTRY(machine_check) | ||
| 1319 | INTR_FRAME | ||
| 1320 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1321 | pushq $0 | ||
| 1322 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1323 | paranoidentry do_machine_check | ||
| 1324 | jmp paranoid_exit1 | ||
| 1325 | CFI_ENDPROC | ||
| 1326 | END(machine_check) | ||
| 1327 | #endif | ||
| 1328 | 1253 | ||
| 1329 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1254 | /* Call softirq on interrupt stack. Interrupts are off. */ |
| 1330 | ENTRY(call_softirq) | 1255 | ENTRY(call_softirq) |
| @@ -1344,40 +1269,33 @@ ENTRY(call_softirq) | |||
| 1344 | decl %gs:pda_irqcount | 1269 | decl %gs:pda_irqcount |
| 1345 | ret | 1270 | ret |
| 1346 | CFI_ENDPROC | 1271 | CFI_ENDPROC |
| 1347 | ENDPROC(call_softirq) | 1272 | END(call_softirq) |
| 1348 | |||
| 1349 | KPROBE_ENTRY(ignore_sysret) | ||
| 1350 | CFI_STARTPROC | ||
| 1351 | mov $-ENOSYS,%eax | ||
| 1352 | sysret | ||
| 1353 | CFI_ENDPROC | ||
| 1354 | ENDPROC(ignore_sysret) | ||
| 1355 | 1273 | ||
| 1356 | #ifdef CONFIG_XEN | 1274 | #ifdef CONFIG_XEN |
| 1357 | ENTRY(xen_hypervisor_callback) | 1275 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback |
| 1358 | zeroentry xen_do_hypervisor_callback | ||
| 1359 | END(xen_hypervisor_callback) | ||
| 1360 | 1276 | ||
| 1361 | /* | 1277 | /* |
| 1362 | # A note on the "critical region" in our callback handler. | 1278 | * A note on the "critical region" in our callback handler. |
| 1363 | # We want to avoid stacking callback handlers due to events occurring | 1279 | * We want to avoid stacking callback handlers due to events occurring |
| 1364 | # during handling of the last event. To do this, we keep events disabled | 1280 | * during handling of the last event. To do this, we keep events disabled |
| 1365 | # until we've done all processing. HOWEVER, we must enable events before | 1281 | * until we've done all processing. HOWEVER, we must enable events before |
| 1366 | # popping the stack frame (can't be done atomically) and so it would still | 1282 | * popping the stack frame (can't be done atomically) and so it would still |
| 1367 | # be possible to get enough handler activations to overflow the stack. | 1283 | * be possible to get enough handler activations to overflow the stack. |
| 1368 | # Although unlikely, bugs of that kind are hard to track down, so we'd | 1284 | * Although unlikely, bugs of that kind are hard to track down, so we'd |
| 1369 | # like to avoid the possibility. | 1285 | * like to avoid the possibility. |
| 1370 | # So, on entry to the handler we detect whether we interrupted an | 1286 | * So, on entry to the handler we detect whether we interrupted an |
| 1371 | # existing activation in its critical region -- if so, we pop the current | 1287 | * existing activation in its critical region -- if so, we pop the current |
| 1372 | # activation and restart the handler using the previous one. | 1288 | * activation and restart the handler using the previous one. |
| 1373 | */ | 1289 | */ |
| 1374 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | 1290 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) |
| 1375 | CFI_STARTPROC | 1291 | CFI_STARTPROC |
| 1376 | /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | 1292 | /* |
| 1377 | see the correct pointer to the pt_regs */ | 1293 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
| 1294 | * see the correct pointer to the pt_regs | ||
| 1295 | */ | ||
| 1378 | movq %rdi, %rsp # we don't return, adjust the stack frame | 1296 | movq %rdi, %rsp # we don't return, adjust the stack frame |
| 1379 | CFI_ENDPROC | 1297 | CFI_ENDPROC |
| 1380 | CFI_DEFAULT_STACK | 1298 | DEFAULT_FRAME |
| 1381 | 11: incl %gs:pda_irqcount | 1299 | 11: incl %gs:pda_irqcount |
| 1382 | movq %rsp,%rbp | 1300 | movq %rsp,%rbp |
| 1383 | CFI_DEF_CFA_REGISTER rbp | 1301 | CFI_DEF_CFA_REGISTER rbp |
| @@ -1392,23 +1310,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | |||
| 1392 | END(do_hypervisor_callback) | 1310 | END(do_hypervisor_callback) |
| 1393 | 1311 | ||
| 1394 | /* | 1312 | /* |
| 1395 | # Hypervisor uses this for application faults while it executes. | 1313 | * Hypervisor uses this for application faults while it executes. |
| 1396 | # We get here for two reasons: | 1314 | * We get here for two reasons: |
| 1397 | # 1. Fault while reloading DS, ES, FS or GS | 1315 | * 1. Fault while reloading DS, ES, FS or GS |
| 1398 | # 2. Fault while executing IRET | 1316 | * 2. Fault while executing IRET |
| 1399 | # Category 1 we do not need to fix up as Xen has already reloaded all segment | 1317 | * Category 1 we do not need to fix up as Xen has already reloaded all segment |
| 1400 | # registers that could be reloaded and zeroed the others. | 1318 | * registers that could be reloaded and zeroed the others. |
| 1401 | # Category 2 we fix up by killing the current process. We cannot use the | 1319 | * Category 2 we fix up by killing the current process. We cannot use the |
| 1402 | # normal Linux return path in this case because if we use the IRET hypercall | 1320 | * normal Linux return path in this case because if we use the IRET hypercall |
| 1403 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | 1321 | * to pop the stack frame we end up in an infinite loop of failsafe callbacks. |
| 1404 | # We distinguish between categories by comparing each saved segment register | 1322 | * We distinguish between categories by comparing each saved segment register |
| 1405 | # with its current contents: any discrepancy means we in category 1. | 1323 | * with its current contents: any discrepancy means we in category 1. |
| 1406 | */ | 1324 | */ |
| 1407 | ENTRY(xen_failsafe_callback) | 1325 | ENTRY(xen_failsafe_callback) |
| 1408 | framesz = (RIP-0x30) /* workaround buggy gas */ | 1326 | INTR_FRAME 1 (6*8) |
| 1409 | _frame framesz | 1327 | /*CFI_REL_OFFSET gs,GS*/ |
| 1410 | CFI_REL_OFFSET rcx, 0 | 1328 | /*CFI_REL_OFFSET fs,FS*/ |
| 1411 | CFI_REL_OFFSET r11, 8 | 1329 | /*CFI_REL_OFFSET es,ES*/ |
| 1330 | /*CFI_REL_OFFSET ds,DS*/ | ||
| 1331 | CFI_REL_OFFSET r11,8 | ||
| 1332 | CFI_REL_OFFSET rcx,0 | ||
| 1412 | movw %ds,%cx | 1333 | movw %ds,%cx |
| 1413 | cmpw %cx,0x10(%rsp) | 1334 | cmpw %cx,0x10(%rsp) |
| 1414 | CFI_REMEMBER_STATE | 1335 | CFI_REMEMBER_STATE |
| @@ -1429,12 +1350,9 @@ ENTRY(xen_failsafe_callback) | |||
| 1429 | CFI_RESTORE r11 | 1350 | CFI_RESTORE r11 |
| 1430 | addq $0x30,%rsp | 1351 | addq $0x30,%rsp |
| 1431 | CFI_ADJUST_CFA_OFFSET -0x30 | 1352 | CFI_ADJUST_CFA_OFFSET -0x30 |
| 1432 | pushq $0 | 1353 | pushq_cfi $0 /* RIP */ |
| 1433 | CFI_ADJUST_CFA_OFFSET 8 | 1354 | pushq_cfi %r11 |
| 1434 | pushq %r11 | 1355 | pushq_cfi %rcx |
| 1435 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1436 | pushq %rcx | ||
| 1437 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1438 | jmp general_protection | 1356 | jmp general_protection |
| 1439 | CFI_RESTORE_STATE | 1357 | CFI_RESTORE_STATE |
| 1440 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | 1358 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
| @@ -1444,11 +1362,223 @@ ENTRY(xen_failsafe_callback) | |||
| 1444 | CFI_RESTORE r11 | 1362 | CFI_RESTORE r11 |
| 1445 | addq $0x30,%rsp | 1363 | addq $0x30,%rsp |
| 1446 | CFI_ADJUST_CFA_OFFSET -0x30 | 1364 | CFI_ADJUST_CFA_OFFSET -0x30 |
| 1447 | pushq $0 | 1365 | pushq_cfi $0 |
| 1448 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1449 | SAVE_ALL | 1366 | SAVE_ALL |
| 1450 | jmp error_exit | 1367 | jmp error_exit |
| 1451 | CFI_ENDPROC | 1368 | CFI_ENDPROC |
| 1452 | END(xen_failsafe_callback) | 1369 | END(xen_failsafe_callback) |
| 1453 | 1370 | ||
| 1454 | #endif /* CONFIG_XEN */ | 1371 | #endif /* CONFIG_XEN */ |
| 1372 | |||
| 1373 | /* | ||
| 1374 | * Some functions should be protected against kprobes | ||
| 1375 | */ | ||
| 1376 | .pushsection .kprobes.text, "ax" | ||
| 1377 | |||
| 1378 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | ||
| 1379 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | ||
| 1380 | paranoiderrorentry stack_segment do_stack_segment | ||
| 1381 | errorentry general_protection do_general_protection | ||
| 1382 | errorentry page_fault do_page_fault | ||
| 1383 | #ifdef CONFIG_X86_MCE | ||
| 1384 | paranoidzeroentry machine_check do_machine_check | ||
| 1385 | #endif | ||
| 1386 | |||
| 1387 | /* | ||
| 1388 | * "Paranoid" exit path from exception stack. | ||
| 1389 | * Paranoid because this is used by NMIs and cannot take | ||
| 1390 | * any kernel state for granted. | ||
| 1391 | * We don't do kernel preemption checks here, because only | ||
| 1392 | * NMI should be common and it does not enable IRQs and | ||
| 1393 | * cannot get reschedule ticks. | ||
| 1394 | * | ||
| 1395 | * "trace" is 0 for the NMI handler only, because irq-tracing | ||
| 1396 | * is fundamentally NMI-unsafe. (we cannot change the soft and | ||
| 1397 | * hard flags at once, atomically) | ||
| 1398 | */ | ||
| 1399 | |||
| 1400 | /* ebx: no swapgs flag */ | ||
| 1401 | ENTRY(paranoid_exit) | ||
| 1402 | INTR_FRAME | ||
| 1403 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
| 1404 | TRACE_IRQS_OFF | ||
| 1405 | testl %ebx,%ebx /* swapgs needed? */ | ||
| 1406 | jnz paranoid_restore | ||
| 1407 | testl $3,CS(%rsp) | ||
| 1408 | jnz paranoid_userspace | ||
| 1409 | paranoid_swapgs: | ||
| 1410 | TRACE_IRQS_IRETQ 0 | ||
| 1411 | SWAPGS_UNSAFE_STACK | ||
| 1412 | paranoid_restore: | ||
| 1413 | RESTORE_ALL 8 | ||
| 1414 | jmp irq_return | ||
| 1415 | paranoid_userspace: | ||
| 1416 | GET_THREAD_INFO(%rcx) | ||
| 1417 | movl TI_flags(%rcx),%ebx | ||
| 1418 | andl $_TIF_WORK_MASK,%ebx | ||
| 1419 | jz paranoid_swapgs | ||
| 1420 | movq %rsp,%rdi /* &pt_regs */ | ||
| 1421 | call sync_regs | ||
| 1422 | movq %rax,%rsp /* switch stack for scheduling */ | ||
| 1423 | testl $_TIF_NEED_RESCHED,%ebx | ||
| 1424 | jnz paranoid_schedule | ||
| 1425 | movl %ebx,%edx /* arg3: thread flags */ | ||
| 1426 | TRACE_IRQS_ON | ||
| 1427 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
| 1428 | xorl %esi,%esi /* arg2: oldset */ | ||
| 1429 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
| 1430 | call do_notify_resume | ||
| 1431 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
| 1432 | TRACE_IRQS_OFF | ||
| 1433 | jmp paranoid_userspace | ||
| 1434 | paranoid_schedule: | ||
| 1435 | TRACE_IRQS_ON | ||
| 1436 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
| 1437 | call schedule | ||
| 1438 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
| 1439 | TRACE_IRQS_OFF | ||
| 1440 | jmp paranoid_userspace | ||
| 1441 | CFI_ENDPROC | ||
| 1442 | END(paranoid_exit) | ||
| 1443 | |||
| 1444 | /* | ||
| 1445 | * Exception entry point. This expects an error code/orig_rax on the stack. | ||
| 1446 | * returns in "no swapgs flag" in %ebx. | ||
| 1447 | */ | ||
| 1448 | ENTRY(error_entry) | ||
| 1449 | XCPT_FRAME | ||
| 1450 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
| 1451 | /* oldrax contains error code */ | ||
| 1452 | cld | ||
| 1453 | movq_cfi rdi, RDI+8 | ||
| 1454 | movq_cfi rsi, RSI+8 | ||
| 1455 | movq_cfi rdx, RDX+8 | ||
| 1456 | movq_cfi rcx, RCX+8 | ||
| 1457 | movq_cfi rax, RAX+8 | ||
| 1458 | movq_cfi r8, R8+8 | ||
| 1459 | movq_cfi r9, R9+8 | ||
| 1460 | movq_cfi r10, R10+8 | ||
| 1461 | movq_cfi r11, R11+8 | ||
| 1462 | movq_cfi rbx, RBX+8 | ||
| 1463 | movq_cfi rbp, RBP+8 | ||
| 1464 | movq_cfi r12, R12+8 | ||
| 1465 | movq_cfi r13, R13+8 | ||
| 1466 | movq_cfi r14, R14+8 | ||
| 1467 | movq_cfi r15, R15+8 | ||
| 1468 | xorl %ebx,%ebx | ||
| 1469 | testl $3,CS+8(%rsp) | ||
| 1470 | je error_kernelspace | ||
| 1471 | error_swapgs: | ||
| 1472 | SWAPGS | ||
| 1473 | error_sti: | ||
| 1474 | TRACE_IRQS_OFF | ||
| 1475 | ret | ||
| 1476 | CFI_ENDPROC | ||
| 1477 | |||
| 1478 | /* | ||
| 1479 | * There are two places in the kernel that can potentially fault with | ||
| 1480 | * usergs. Handle them here. The exception handlers after iret run with | ||
| 1481 | * kernel gs again, so don't set the user space flag. B stepping K8s | ||
| 1482 | * sometimes report an truncated RIP for IRET exceptions returning to | ||
| 1483 | * compat mode. Check for these here too. | ||
| 1484 | */ | ||
| 1485 | error_kernelspace: | ||
| 1486 | incl %ebx | ||
| 1487 | leaq irq_return(%rip),%rcx | ||
| 1488 | cmpq %rcx,RIP+8(%rsp) | ||
| 1489 | je error_swapgs | ||
| 1490 | movl %ecx,%ecx /* zero extend */ | ||
| 1491 | cmpq %rcx,RIP+8(%rsp) | ||
| 1492 | je error_swapgs | ||
| 1493 | cmpq $gs_change,RIP+8(%rsp) | ||
| 1494 | je error_swapgs | ||
| 1495 | jmp error_sti | ||
| 1496 | END(error_entry) | ||
| 1497 | |||
| 1498 | |||
| 1499 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
| 1500 | ENTRY(error_exit) | ||
| 1501 | DEFAULT_FRAME | ||
| 1502 | movl %ebx,%eax | ||
| 1503 | RESTORE_REST | ||
| 1504 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
| 1505 | TRACE_IRQS_OFF | ||
| 1506 | GET_THREAD_INFO(%rcx) | ||
| 1507 | testl %eax,%eax | ||
| 1508 | jne retint_kernel | ||
| 1509 | LOCKDEP_SYS_EXIT_IRQ | ||
| 1510 | movl TI_flags(%rcx),%edx | ||
| 1511 | movl $_TIF_WORK_MASK,%edi | ||
| 1512 | andl %edi,%edx | ||
| 1513 | jnz retint_careful | ||
| 1514 | jmp retint_swapgs | ||
| 1515 | CFI_ENDPROC | ||
| 1516 | END(error_exit) | ||
| 1517 | |||
| 1518 | |||
| 1519 | /* runs on exception stack */ | ||
| 1520 | ENTRY(nmi) | ||
| 1521 | INTR_FRAME | ||
| 1522 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1523 | pushq_cfi $-1 | ||
| 1524 | subq $15*8, %rsp | ||
| 1525 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
| 1526 | call save_paranoid | ||
| 1527 | DEFAULT_FRAME 0 | ||
| 1528 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | ||
| 1529 | movq %rsp,%rdi | ||
| 1530 | movq $-1,%rsi | ||
| 1531 | call do_nmi | ||
| 1532 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
| 1533 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
| 1534 | /* ebx: no swapgs flag */ | ||
| 1535 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
| 1536 | testl %ebx,%ebx /* swapgs needed? */ | ||
| 1537 | jnz nmi_restore | ||
| 1538 | testl $3,CS(%rsp) | ||
| 1539 | jnz nmi_userspace | ||
| 1540 | nmi_swapgs: | ||
| 1541 | SWAPGS_UNSAFE_STACK | ||
| 1542 | nmi_restore: | ||
| 1543 | RESTORE_ALL 8 | ||
| 1544 | jmp irq_return | ||
| 1545 | nmi_userspace: | ||
| 1546 | GET_THREAD_INFO(%rcx) | ||
| 1547 | movl TI_flags(%rcx),%ebx | ||
| 1548 | andl $_TIF_WORK_MASK,%ebx | ||
| 1549 | jz nmi_swapgs | ||
| 1550 | movq %rsp,%rdi /* &pt_regs */ | ||
| 1551 | call sync_regs | ||
| 1552 | movq %rax,%rsp /* switch stack for scheduling */ | ||
| 1553 | testl $_TIF_NEED_RESCHED,%ebx | ||
| 1554 | jnz nmi_schedule | ||
| 1555 | movl %ebx,%edx /* arg3: thread flags */ | ||
| 1556 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
| 1557 | xorl %esi,%esi /* arg2: oldset */ | ||
| 1558 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
| 1559 | call do_notify_resume | ||
| 1560 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
| 1561 | jmp nmi_userspace | ||
| 1562 | nmi_schedule: | ||
| 1563 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
| 1564 | call schedule | ||
| 1565 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
| 1566 | jmp nmi_userspace | ||
| 1567 | CFI_ENDPROC | ||
| 1568 | #else | ||
| 1569 | jmp paranoid_exit | ||
| 1570 | CFI_ENDPROC | ||
| 1571 | #endif | ||
| 1572 | END(nmi) | ||
| 1573 | |||
| 1574 | ENTRY(ignore_sysret) | ||
| 1575 | CFI_STARTPROC | ||
| 1576 | mov $-ENOSYS,%eax | ||
| 1577 | sysret | ||
| 1578 | CFI_ENDPROC | ||
| 1579 | END(ignore_sysret) | ||
| 1580 | |||
| 1581 | /* | ||
| 1582 | * End of kprobes section | ||
| 1583 | */ | ||
| 1584 | .popsection | ||
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 0aa2c443d600..53699c931ad4 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c | |||
| @@ -38,8 +38,11 @@ | |||
| 38 | #include <asm/io.h> | 38 | #include <asm/io.h> |
| 39 | #include <asm/nmi.h> | 39 | #include <asm/nmi.h> |
| 40 | #include <asm/smp.h> | 40 | #include <asm/smp.h> |
| 41 | #include <asm/atomic.h> | ||
| 41 | #include <asm/apicdef.h> | 42 | #include <asm/apicdef.h> |
| 42 | #include <mach_mpparse.h> | 43 | #include <mach_mpparse.h> |
| 44 | #include <asm/genapic.h> | ||
| 45 | #include <asm/setup.h> | ||
| 43 | 46 | ||
| 44 | /* | 47 | /* |
| 45 | * ES7000 chipsets | 48 | * ES7000 chipsets |
| @@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi) | |||
| 161 | return gsi; | 164 | return gsi; |
| 162 | } | 165 | } |
| 163 | 166 | ||
| 167 | static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | ||
| 168 | { | ||
| 169 | unsigned long vect = 0, psaival = 0; | ||
| 170 | |||
| 171 | if (psai == NULL) | ||
| 172 | return -1; | ||
| 173 | |||
| 174 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
| 175 | psaival = (0x1000000 | vect | cpu); | ||
| 176 | |||
| 177 | while (*psai & 0x1000000) | ||
| 178 | ; | ||
| 179 | |||
| 180 | *psai = psaival; | ||
| 181 | |||
| 182 | return 0; | ||
| 183 | } | ||
| 184 | |||
| 185 | static void noop_wait_for_deassert(atomic_t *deassert_not_used) | ||
| 186 | { | ||
| 187 | } | ||
| 188 | |||
| 189 | static int __init es7000_update_genapic(void) | ||
| 190 | { | ||
| 191 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
| 192 | |||
| 193 | /* MPENTIUMIII */ | ||
| 194 | if (boot_cpu_data.x86 == 6 && | ||
| 195 | (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { | ||
| 196 | es7000_update_genapic_to_cluster(); | ||
| 197 | genapic->wait_for_init_deassert = noop_wait_for_deassert; | ||
| 198 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
| 199 | } | ||
| 200 | |||
| 201 | return 0; | ||
| 202 | } | ||
| 203 | |||
| 164 | void __init | 204 | void __init |
| 165 | setup_unisys(void) | 205 | setup_unisys(void) |
| 166 | { | 206 | { |
| @@ -176,6 +216,8 @@ setup_unisys(void) | |||
| 176 | else | 216 | else |
| 177 | es7000_plat = ES7000_CLASSIC; | 217 | es7000_plat = ES7000_CLASSIC; |
| 178 | ioapic_renumber_irq = es7000_rename_gsi; | 218 | ioapic_renumber_irq = es7000_rename_gsi; |
| 219 | |||
| 220 | x86_quirks->update_genapic = es7000_update_genapic; | ||
| 179 | } | 221 | } |
| 180 | 222 | ||
| 181 | /* | 223 | /* |
| @@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg) | |||
| 317 | return status; | 359 | return status; |
| 318 | } | 360 | } |
| 319 | 361 | ||
| 320 | int | ||
| 321 | es7000_start_cpu(int cpu, unsigned long eip) | ||
| 322 | { | ||
| 323 | unsigned long vect = 0, psaival = 0; | ||
| 324 | |||
| 325 | if (psai == NULL) | ||
| 326 | return -1; | ||
| 327 | |||
| 328 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
| 329 | psaival = (0x1000000 | vect | cpu); | ||
| 330 | |||
| 331 | while (*psai & 0x1000000) | ||
| 332 | ; | ||
| 333 | |||
| 334 | *psai = psaival; | ||
| 335 | |||
| 336 | return 0; | ||
| 337 | |||
| 338 | } | ||
| 339 | |||
| 340 | void __init | 362 | void __init |
| 341 | es7000_sw_apic(void) | 363 | es7000_sw_apic(void) |
| 342 | { | 364 | { |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 50ea0ac8c9bf..1b43086b097a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
| @@ -14,14 +14,17 @@ | |||
| 14 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
| 15 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
| 16 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
| 17 | #include <linux/sched.h> | ||
| 17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
| 18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
| 19 | 20 | ||
| 20 | #include <asm/ftrace.h> | 21 | #include <asm/ftrace.h> |
| 22 | #include <linux/ftrace.h> | ||
| 21 | #include <asm/nops.h> | 23 | #include <asm/nops.h> |
| 24 | #include <asm/nmi.h> | ||
| 22 | 25 | ||
| 23 | 26 | ||
| 24 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | 27 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 25 | 28 | ||
| 26 | union ftrace_code_union { | 29 | union ftrace_code_union { |
| 27 | char code[MCOUNT_INSN_SIZE]; | 30 | char code[MCOUNT_INSN_SIZE]; |
| @@ -31,18 +34,12 @@ union ftrace_code_union { | |||
| 31 | } __attribute__((packed)); | 34 | } __attribute__((packed)); |
| 32 | }; | 35 | }; |
| 33 | 36 | ||
| 34 | |||
| 35 | static int ftrace_calc_offset(long ip, long addr) | 37 | static int ftrace_calc_offset(long ip, long addr) |
| 36 | { | 38 | { |
| 37 | return (int)(addr - ip); | 39 | return (int)(addr - ip); |
| 38 | } | 40 | } |
| 39 | 41 | ||
| 40 | unsigned char *ftrace_nop_replace(void) | 42 | static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) |
| 41 | { | ||
| 42 | return ftrace_nop; | ||
| 43 | } | ||
| 44 | |||
| 45 | unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | ||
| 46 | { | 43 | { |
| 47 | static union ftrace_code_union calc; | 44 | static union ftrace_code_union calc; |
| 48 | 45 | ||
| @@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | |||
| 56 | return calc.code; | 53 | return calc.code; |
| 57 | } | 54 | } |
| 58 | 55 | ||
| 59 | int | 56 | /* |
| 57 | * Modifying code must take extra care. On an SMP machine, if | ||
| 58 | * the code being modified is also being executed on another CPU | ||
| 59 | * that CPU will have undefined results and possibly take a GPF. | ||
| 60 | * We use kstop_machine to stop other CPUS from exectuing code. | ||
| 61 | * But this does not stop NMIs from happening. We still need | ||
| 62 | * to protect against that. We separate out the modification of | ||
| 63 | * the code to take care of this. | ||
| 64 | * | ||
| 65 | * Two buffers are added: An IP buffer and a "code" buffer. | ||
| 66 | * | ||
| 67 | * 1) Put the instruction pointer into the IP buffer | ||
| 68 | * and the new code into the "code" buffer. | ||
| 69 | * 2) Set a flag that says we are modifying code | ||
| 70 | * 3) Wait for any running NMIs to finish. | ||
| 71 | * 4) Write the code | ||
| 72 | * 5) clear the flag. | ||
| 73 | * 6) Wait for any running NMIs to finish. | ||
| 74 | * | ||
| 75 | * If an NMI is executed, the first thing it does is to call | ||
| 76 | * "ftrace_nmi_enter". This will check if the flag is set to write | ||
| 77 | * and if it is, it will write what is in the IP and "code" buffers. | ||
| 78 | * | ||
| 79 | * The trick is, it does not matter if everyone is writing the same | ||
| 80 | * content to the code location. Also, if a CPU is executing code | ||
| 81 | * it is OK to write to that code location if the contents being written | ||
| 82 | * are the same as what exists. | ||
| 83 | */ | ||
| 84 | |||
| 85 | static atomic_t in_nmi = ATOMIC_INIT(0); | ||
| 86 | static int mod_code_status; /* holds return value of text write */ | ||
| 87 | static int mod_code_write; /* set when NMI should do the write */ | ||
| 88 | static void *mod_code_ip; /* holds the IP to write to */ | ||
| 89 | static void *mod_code_newcode; /* holds the text to write to the IP */ | ||
| 90 | |||
| 91 | static unsigned nmi_wait_count; | ||
| 92 | static atomic_t nmi_update_count = ATOMIC_INIT(0); | ||
| 93 | |||
| 94 | int ftrace_arch_read_dyn_info(char *buf, int size) | ||
| 95 | { | ||
| 96 | int r; | ||
| 97 | |||
| 98 | r = snprintf(buf, size, "%u %u", | ||
| 99 | nmi_wait_count, | ||
| 100 | atomic_read(&nmi_update_count)); | ||
| 101 | return r; | ||
| 102 | } | ||
| 103 | |||
| 104 | static void ftrace_mod_code(void) | ||
| 105 | { | ||
| 106 | /* | ||
| 107 | * Yes, more than one CPU process can be writing to mod_code_status. | ||
| 108 | * (and the code itself) | ||
| 109 | * But if one were to fail, then they all should, and if one were | ||
| 110 | * to succeed, then they all should. | ||
| 111 | */ | ||
| 112 | mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, | ||
| 113 | MCOUNT_INSN_SIZE); | ||
| 114 | } | ||
| 115 | |||
| 116 | void ftrace_nmi_enter(void) | ||
| 117 | { | ||
| 118 | atomic_inc(&in_nmi); | ||
| 119 | /* Must have in_nmi seen before reading write flag */ | ||
| 120 | smp_mb(); | ||
| 121 | if (mod_code_write) { | ||
| 122 | ftrace_mod_code(); | ||
| 123 | atomic_inc(&nmi_update_count); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | void ftrace_nmi_exit(void) | ||
| 128 | { | ||
| 129 | /* Finish all executions before clearing in_nmi */ | ||
| 130 | smp_wmb(); | ||
| 131 | atomic_dec(&in_nmi); | ||
| 132 | } | ||
| 133 | |||
| 134 | static void wait_for_nmi(void) | ||
| 135 | { | ||
| 136 | int waited = 0; | ||
| 137 | |||
| 138 | while (atomic_read(&in_nmi)) { | ||
| 139 | waited = 1; | ||
| 140 | cpu_relax(); | ||
| 141 | } | ||
| 142 | |||
| 143 | if (waited) | ||
| 144 | nmi_wait_count++; | ||
| 145 | } | ||
| 146 | |||
| 147 | static int | ||
| 148 | do_ftrace_mod_code(unsigned long ip, void *new_code) | ||
| 149 | { | ||
| 150 | mod_code_ip = (void *)ip; | ||
| 151 | mod_code_newcode = new_code; | ||
| 152 | |||
| 153 | /* The buffers need to be visible before we let NMIs write them */ | ||
| 154 | smp_wmb(); | ||
| 155 | |||
| 156 | mod_code_write = 1; | ||
| 157 | |||
| 158 | /* Make sure write bit is visible before we wait on NMIs */ | ||
| 159 | smp_mb(); | ||
| 160 | |||
| 161 | wait_for_nmi(); | ||
| 162 | |||
| 163 | /* Make sure all running NMIs have finished before we write the code */ | ||
| 164 | smp_mb(); | ||
| 165 | |||
| 166 | ftrace_mod_code(); | ||
| 167 | |||
| 168 | /* Make sure the write happens before clearing the bit */ | ||
| 169 | smp_wmb(); | ||
| 170 | |||
| 171 | mod_code_write = 0; | ||
| 172 | |||
| 173 | /* make sure NMIs see the cleared bit */ | ||
| 174 | smp_mb(); | ||
| 175 | |||
| 176 | wait_for_nmi(); | ||
| 177 | |||
| 178 | return mod_code_status; | ||
| 179 | } | ||
| 180 | |||
| 181 | |||
| 182 | |||
| 183 | |||
| 184 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | ||
| 185 | |||
| 186 | static unsigned char *ftrace_nop_replace(void) | ||
| 187 | { | ||
| 188 | return ftrace_nop; | ||
| 189 | } | ||
| 190 | |||
| 191 | static int | ||
| 60 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, | 192 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, |
| 61 | unsigned char *new_code) | 193 | unsigned char *new_code) |
| 62 | { | 194 | { |
| @@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
| 81 | return -EINVAL; | 213 | return -EINVAL; |
| 82 | 214 | ||
| 83 | /* replace the text with the new text */ | 215 | /* replace the text with the new text */ |
| 84 | if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) | 216 | if (do_ftrace_mod_code(ip, new_code)) |
| 85 | return -EPERM; | 217 | return -EPERM; |
| 86 | 218 | ||
| 87 | sync_core(); | 219 | sync_core(); |
| @@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
| 89 | return 0; | 221 | return 0; |
| 90 | } | 222 | } |
| 91 | 223 | ||
| 224 | int ftrace_make_nop(struct module *mod, | ||
| 225 | struct dyn_ftrace *rec, unsigned long addr) | ||
| 226 | { | ||
| 227 | unsigned char *new, *old; | ||
| 228 | unsigned long ip = rec->ip; | ||
| 229 | |||
| 230 | old = ftrace_call_replace(ip, addr); | ||
| 231 | new = ftrace_nop_replace(); | ||
| 232 | |||
| 233 | return ftrace_modify_code(rec->ip, old, new); | ||
| 234 | } | ||
| 235 | |||
| 236 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | ||
| 237 | { | ||
| 238 | unsigned char *new, *old; | ||
| 239 | unsigned long ip = rec->ip; | ||
| 240 | |||
| 241 | old = ftrace_nop_replace(); | ||
| 242 | new = ftrace_call_replace(ip, addr); | ||
| 243 | |||
| 244 | return ftrace_modify_code(rec->ip, old, new); | ||
| 245 | } | ||
| 246 | |||
| 92 | int ftrace_update_ftrace_func(ftrace_func_t func) | 247 | int ftrace_update_ftrace_func(ftrace_func_t func) |
| 93 | { | 248 | { |
| 94 | unsigned long ip = (unsigned long)(&ftrace_call); | 249 | unsigned long ip = (unsigned long)(&ftrace_call); |
| @@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data) | |||
| 165 | 320 | ||
| 166 | return 0; | 321 | return 0; |
| 167 | } | 322 | } |
| 323 | #endif | ||
| 324 | |||
| 325 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 326 | |||
| 327 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
| 328 | extern void ftrace_graph_call(void); | ||
| 329 | |||
| 330 | static int ftrace_mod_jmp(unsigned long ip, | ||
| 331 | int old_offset, int new_offset) | ||
| 332 | { | ||
| 333 | unsigned char code[MCOUNT_INSN_SIZE]; | ||
| 334 | |||
| 335 | if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) | ||
| 336 | return -EFAULT; | ||
| 337 | |||
| 338 | if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) | ||
| 339 | return -EINVAL; | ||
| 340 | |||
| 341 | *(int *)(&code[1]) = new_offset; | ||
| 342 | |||
| 343 | if (do_ftrace_mod_code(ip, &code)) | ||
| 344 | return -EPERM; | ||
| 345 | |||
| 346 | return 0; | ||
| 347 | } | ||
| 348 | |||
| 349 | int ftrace_enable_ftrace_graph_caller(void) | ||
| 350 | { | ||
| 351 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | ||
| 352 | int old_offset, new_offset; | ||
| 353 | |||
| 354 | old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
| 355 | new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
| 356 | |||
| 357 | return ftrace_mod_jmp(ip, old_offset, new_offset); | ||
| 358 | } | ||
| 359 | |||
| 360 | int ftrace_disable_ftrace_graph_caller(void) | ||
| 361 | { | ||
| 362 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | ||
| 363 | int old_offset, new_offset; | ||
| 364 | |||
| 365 | old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
| 366 | new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
| 367 | |||
| 368 | return ftrace_mod_jmp(ip, old_offset, new_offset); | ||
| 369 | } | ||
| 370 | |||
| 371 | #else /* CONFIG_DYNAMIC_FTRACE */ | ||
| 372 | |||
| 373 | /* | ||
| 374 | * These functions are picked from those used on | ||
| 375 | * this page for dynamic ftrace. They have been | ||
| 376 | * simplified to ignore all traces in NMI context. | ||
| 377 | */ | ||
| 378 | static atomic_t in_nmi; | ||
| 379 | |||
| 380 | void ftrace_nmi_enter(void) | ||
| 381 | { | ||
| 382 | atomic_inc(&in_nmi); | ||
| 383 | } | ||
| 384 | |||
| 385 | void ftrace_nmi_exit(void) | ||
| 386 | { | ||
| 387 | atomic_dec(&in_nmi); | ||
| 388 | } | ||
| 389 | |||
| 390 | #endif /* !CONFIG_DYNAMIC_FTRACE */ | ||
| 391 | |||
| 392 | /* Add a function return address to the trace stack on thread info.*/ | ||
| 393 | static int push_return_trace(unsigned long ret, unsigned long long time, | ||
| 394 | unsigned long func, int *depth) | ||
| 395 | { | ||
| 396 | int index; | ||
| 397 | |||
| 398 | if (!current->ret_stack) | ||
| 399 | return -EBUSY; | ||
| 400 | |||
| 401 | /* The return trace stack is full */ | ||
| 402 | if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { | ||
| 403 | atomic_inc(¤t->trace_overrun); | ||
| 404 | return -EBUSY; | ||
| 405 | } | ||
| 406 | |||
| 407 | index = ++current->curr_ret_stack; | ||
| 408 | barrier(); | ||
| 409 | current->ret_stack[index].ret = ret; | ||
| 410 | current->ret_stack[index].func = func; | ||
| 411 | current->ret_stack[index].calltime = time; | ||
| 412 | *depth = index; | ||
| 413 | |||
| 414 | return 0; | ||
| 415 | } | ||
| 416 | |||
| 417 | /* Retrieve a function return address to the trace stack on thread info.*/ | ||
| 418 | static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | ||
| 419 | { | ||
| 420 | int index; | ||
| 421 | |||
| 422 | index = current->curr_ret_stack; | ||
| 423 | |||
| 424 | if (unlikely(index < 0)) { | ||
| 425 | ftrace_graph_stop(); | ||
| 426 | WARN_ON(1); | ||
| 427 | /* Might as well panic, otherwise we have no where to go */ | ||
| 428 | *ret = (unsigned long)panic; | ||
| 429 | return; | ||
| 430 | } | ||
| 431 | |||
| 432 | *ret = current->ret_stack[index].ret; | ||
| 433 | trace->func = current->ret_stack[index].func; | ||
| 434 | trace->calltime = current->ret_stack[index].calltime; | ||
| 435 | trace->overrun = atomic_read(¤t->trace_overrun); | ||
| 436 | trace->depth = index; | ||
| 437 | barrier(); | ||
| 438 | current->curr_ret_stack--; | ||
| 439 | |||
| 440 | } | ||
| 441 | |||
| 442 | /* | ||
| 443 | * Send the trace to the ring-buffer. | ||
| 444 | * @return the original return address. | ||
| 445 | */ | ||
| 446 | unsigned long ftrace_return_to_handler(void) | ||
| 447 | { | ||
| 448 | struct ftrace_graph_ret trace; | ||
| 449 | unsigned long ret; | ||
| 450 | |||
| 451 | pop_return_trace(&trace, &ret); | ||
| 452 | trace.rettime = cpu_clock(raw_smp_processor_id()); | ||
| 453 | ftrace_graph_return(&trace); | ||
| 454 | |||
| 455 | if (unlikely(!ret)) { | ||
| 456 | ftrace_graph_stop(); | ||
| 457 | WARN_ON(1); | ||
| 458 | /* Might as well panic. What else to do? */ | ||
| 459 | ret = (unsigned long)panic; | ||
| 460 | } | ||
| 461 | |||
| 462 | return ret; | ||
| 463 | } | ||
| 464 | |||
| 465 | /* | ||
| 466 | * Hook the return address and push it in the stack of return addrs | ||
| 467 | * in current thread info. | ||
| 468 | */ | ||
| 469 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | ||
| 470 | { | ||
| 471 | unsigned long old; | ||
| 472 | unsigned long long calltime; | ||
| 473 | int faulted; | ||
| 474 | struct ftrace_graph_ent trace; | ||
| 475 | unsigned long return_hooker = (unsigned long) | ||
| 476 | &return_to_handler; | ||
| 477 | |||
| 478 | /* Nmi's are currently unsupported */ | ||
| 479 | if (unlikely(atomic_read(&in_nmi))) | ||
| 480 | return; | ||
| 481 | |||
| 482 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | ||
| 483 | return; | ||
| 484 | |||
| 485 | /* | ||
| 486 | * Protect against fault, even if it shouldn't | ||
| 487 | * happen. This tool is too much intrusive to | ||
| 488 | * ignore such a protection. | ||
| 489 | */ | ||
| 490 | asm volatile( | ||
| 491 | "1: " _ASM_MOV " (%[parent_old]), %[old]\n" | ||
| 492 | "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" | ||
| 493 | " movl $0, %[faulted]\n" | ||
| 494 | |||
| 495 | ".section .fixup, \"ax\"\n" | ||
| 496 | "3: movl $1, %[faulted]\n" | ||
| 497 | ".previous\n" | ||
| 498 | |||
| 499 | _ASM_EXTABLE(1b, 3b) | ||
| 500 | _ASM_EXTABLE(2b, 3b) | ||
| 501 | |||
| 502 | : [parent_replaced] "=r" (parent), [old] "=r" (old), | ||
| 503 | [faulted] "=r" (faulted) | ||
| 504 | : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) | ||
| 505 | : "memory" | ||
| 506 | ); | ||
| 507 | |||
| 508 | if (unlikely(faulted)) { | ||
| 509 | ftrace_graph_stop(); | ||
| 510 | WARN_ON(1); | ||
| 511 | return; | ||
| 512 | } | ||
| 513 | |||
| 514 | if (unlikely(!__kernel_text_address(old))) { | ||
| 515 | ftrace_graph_stop(); | ||
| 516 | *parent = old; | ||
| 517 | WARN_ON(1); | ||
| 518 | return; | ||
| 519 | } | ||
| 520 | |||
| 521 | calltime = cpu_clock(raw_smp_processor_id()); | ||
| 522 | |||
| 523 | if (push_return_trace(old, calltime, | ||
| 524 | self_addr, &trace.depth) == -EBUSY) { | ||
| 525 | *parent = old; | ||
| 526 | return; | ||
| 527 | } | ||
| 528 | |||
| 529 | trace.func = self_addr; | ||
| 530 | |||
| 531 | /* Only trace if the calling function expects to */ | ||
| 532 | if (!ftrace_graph_entry(&trace)) { | ||
| 533 | current->curr_ret_stack--; | ||
| 534 | *parent = old; | ||
| 535 | } | ||
| 536 | } | ||
| 537 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6c9bfc9e1e95..2bced78b0b8e 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
| 22 | #include <asm/ipi.h> | 22 | #include <asm/ipi.h> |
| 23 | #include <asm/genapic.h> | 23 | #include <asm/genapic.h> |
| 24 | #include <asm/setup.h> | ||
| 24 | 25 | ||
| 25 | extern struct genapic apic_flat; | 26 | extern struct genapic apic_flat; |
| 26 | extern struct genapic apic_physflat; | 27 | extern struct genapic apic_physflat; |
| @@ -53,6 +54,9 @@ void __init setup_apic_routing(void) | |||
| 53 | genapic = &apic_physflat; | 54 | genapic = &apic_physflat; |
| 54 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | 55 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); |
| 55 | } | 56 | } |
| 57 | |||
| 58 | if (x86_quirks->update_genapic) | ||
| 59 | x86_quirks->update_genapic(); | ||
| 56 | } | 60 | } |
| 57 | 61 | ||
| 58 | /* Same for both flat and physical. */ | 62 | /* Same for both flat and physical. */ |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2c7dbdb98278..dece17289731 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
| 12 | #include <linux/threads.h> | 12 | #include <linux/threads.h> |
| 13 | #include <linux/cpu.h> | ||
| 13 | #include <linux/cpumask.h> | 14 | #include <linux/cpumask.h> |
| 14 | #include <linux/string.h> | 15 | #include <linux/string.h> |
| 15 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
| @@ -17,6 +18,9 @@ | |||
| 17 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
| 19 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
| 21 | #include <linux/timer.h> | ||
| 22 | #include <linux/proc_fs.h> | ||
| 23 | #include <asm/current.h> | ||
| 20 | #include <asm/smp.h> | 24 | #include <asm/smp.h> |
| 21 | #include <asm/ipi.h> | 25 | #include <asm/ipi.h> |
| 22 | #include <asm/genapic.h> | 26 | #include <asm/genapic.h> |
| @@ -356,6 +360,103 @@ static __init void uv_rtc_init(void) | |||
| 356 | } | 360 | } |
| 357 | 361 | ||
| 358 | /* | 362 | /* |
| 363 | * percpu heartbeat timer | ||
| 364 | */ | ||
| 365 | static void uv_heartbeat(unsigned long ignored) | ||
| 366 | { | ||
| 367 | struct timer_list *timer = &uv_hub_info->scir.timer; | ||
| 368 | unsigned char bits = uv_hub_info->scir.state; | ||
| 369 | |||
| 370 | /* flip heartbeat bit */ | ||
| 371 | bits ^= SCIR_CPU_HEARTBEAT; | ||
| 372 | |||
| 373 | /* is this cpu idle? */ | ||
| 374 | if (idle_cpu(raw_smp_processor_id())) | ||
| 375 | bits &= ~SCIR_CPU_ACTIVITY; | ||
| 376 | else | ||
| 377 | bits |= SCIR_CPU_ACTIVITY; | ||
| 378 | |||
| 379 | /* update system controller interface reg */ | ||
| 380 | uv_set_scir_bits(bits); | ||
| 381 | |||
| 382 | /* enable next timer period */ | ||
| 383 | mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); | ||
| 384 | } | ||
| 385 | |||
| 386 | static void __cpuinit uv_heartbeat_enable(int cpu) | ||
| 387 | { | ||
| 388 | if (!uv_cpu_hub_info(cpu)->scir.enabled) { | ||
| 389 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; | ||
| 390 | |||
| 391 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); | ||
| 392 | setup_timer(timer, uv_heartbeat, cpu); | ||
| 393 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; | ||
| 394 | add_timer_on(timer, cpu); | ||
| 395 | uv_cpu_hub_info(cpu)->scir.enabled = 1; | ||
| 396 | } | ||
| 397 | |||
| 398 | /* check boot cpu */ | ||
| 399 | if (!uv_cpu_hub_info(0)->scir.enabled) | ||
| 400 | uv_heartbeat_enable(0); | ||
| 401 | } | ||
| 402 | |||
| 403 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 404 | static void __cpuinit uv_heartbeat_disable(int cpu) | ||
| 405 | { | ||
| 406 | if (uv_cpu_hub_info(cpu)->scir.enabled) { | ||
| 407 | uv_cpu_hub_info(cpu)->scir.enabled = 0; | ||
| 408 | del_timer(&uv_cpu_hub_info(cpu)->scir.timer); | ||
| 409 | } | ||
| 410 | uv_set_cpu_scir_bits(cpu, 0xff); | ||
| 411 | } | ||
| 412 | |||
| 413 | /* | ||
| 414 | * cpu hotplug notifier | ||
| 415 | */ | ||
| 416 | static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self, | ||
| 417 | unsigned long action, void *hcpu) | ||
| 418 | { | ||
| 419 | long cpu = (long)hcpu; | ||
| 420 | |||
| 421 | switch (action) { | ||
| 422 | case CPU_ONLINE: | ||
| 423 | uv_heartbeat_enable(cpu); | ||
| 424 | break; | ||
| 425 | case CPU_DOWN_PREPARE: | ||
| 426 | uv_heartbeat_disable(cpu); | ||
| 427 | break; | ||
| 428 | default: | ||
| 429 | break; | ||
| 430 | } | ||
| 431 | return NOTIFY_OK; | ||
| 432 | } | ||
| 433 | |||
| 434 | static __init void uv_scir_register_cpu_notifier(void) | ||
| 435 | { | ||
| 436 | hotcpu_notifier(uv_scir_cpu_notify, 0); | ||
| 437 | } | ||
| 438 | |||
| 439 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
| 440 | |||
| 441 | static __init void uv_scir_register_cpu_notifier(void) | ||
| 442 | { | ||
| 443 | } | ||
| 444 | |||
| 445 | static __init int uv_init_heartbeat(void) | ||
| 446 | { | ||
| 447 | int cpu; | ||
| 448 | |||
| 449 | if (is_uv_system()) | ||
| 450 | for_each_online_cpu(cpu) | ||
| 451 | uv_heartbeat_enable(cpu); | ||
| 452 | return 0; | ||
| 453 | } | ||
| 454 | |||
| 455 | late_initcall(uv_init_heartbeat); | ||
| 456 | |||
| 457 | #endif /* !CONFIG_HOTPLUG_CPU */ | ||
| 458 | |||
| 459 | /* | ||
| 359 | * Called on each cpu to initialize the per_cpu UV data area. | 460 | * Called on each cpu to initialize the per_cpu UV data area. |
| 360 | * ZZZ hotplug not supported yet | 461 | * ZZZ hotplug not supported yet |
| 361 | */ | 462 | */ |
| @@ -428,7 +529,7 @@ void __init uv_system_init(void) | |||
| 428 | 529 | ||
| 429 | uv_bios_init(); | 530 | uv_bios_init(); |
| 430 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, | 531 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, |
| 431 | &uv_coherency_id, &uv_region_size); | 532 | &sn_coherency_id, &sn_region_size); |
| 432 | uv_rtc_init(); | 533 | uv_rtc_init(); |
| 433 | 534 | ||
| 434 | for_each_present_cpu(cpu) { | 535 | for_each_present_cpu(cpu) { |
| @@ -439,8 +540,7 @@ void __init uv_system_init(void) | |||
| 439 | uv_blade_info[blade].nr_possible_cpus++; | 540 | uv_blade_info[blade].nr_possible_cpus++; |
| 440 | 541 | ||
| 441 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 542 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; |
| 442 | uv_cpu_hub_info(cpu)->lowmem_remap_top = | 543 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; |
| 443 | lowmem_redir_base + lowmem_redir_size; | ||
| 444 | uv_cpu_hub_info(cpu)->m_val = m_val; | 544 | uv_cpu_hub_info(cpu)->m_val = m_val; |
| 445 | uv_cpu_hub_info(cpu)->n_val = m_val; | 545 | uv_cpu_hub_info(cpu)->n_val = m_val; |
| 446 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 546 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; |
| @@ -450,7 +550,8 @@ void __init uv_system_init(void) | |||
| 450 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; | 550 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; |
| 451 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 551 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
| 452 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 552 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
| 453 | uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; | 553 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; |
| 554 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; | ||
| 454 | uv_node_to_blade[nid] = blade; | 555 | uv_node_to_blade[nid] = blade; |
| 455 | uv_cpu_to_blade[cpu] = blade; | 556 | uv_cpu_to_blade[cpu] = blade; |
| 456 | max_pnode = max(pnode, max_pnode); | 557 | max_pnode = max(pnode, max_pnode); |
| @@ -467,4 +568,6 @@ void __init uv_system_init(void) | |||
| 467 | map_mmioh_high(max_pnode); | 568 | map_mmioh_high(max_pnode); |
| 468 | 569 | ||
| 469 | uv_cpu_init(); | 570 | uv_cpu_init(); |
| 571 | uv_scir_register_cpu_notifier(); | ||
| 572 | proc_mkdir("sgi_uv", NULL); | ||
| 470 | } | 573 | } |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 1dcb0f13897e..3e66bd364a9d 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
| @@ -35,7 +35,6 @@ void __init reserve_ebda_region(void) | |||
| 35 | 35 | ||
| 36 | /* start of EBDA area */ | 36 | /* start of EBDA area */ |
| 37 | ebda_addr = get_bios_ebda(); | 37 | ebda_addr = get_bios_ebda(); |
| 38 | printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); | ||
| 39 | 38 | ||
| 40 | /* Fixup: bios puts an EBDA in the top 64K segment */ | 39 | /* Fixup: bios puts an EBDA in the top 64K segment */ |
| 41 | /* of conventional memory, but does not adjust lowmem. */ | 40 | /* of conventional memory, but does not adjust lowmem. */ |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index fa1d25dd83e3..ac108d1fe182 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
| @@ -12,9 +12,12 @@ | |||
| 12 | #include <asm/sections.h> | 12 | #include <asm/sections.h> |
| 13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
| 14 | #include <asm/bios_ebda.h> | 14 | #include <asm/bios_ebda.h> |
| 15 | #include <asm/trampoline.h> | ||
| 15 | 16 | ||
| 16 | void __init i386_start_kernel(void) | 17 | void __init i386_start_kernel(void) |
| 17 | { | 18 | { |
| 19 | reserve_trampoline_memory(); | ||
| 20 | |||
| 18 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 21 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
| 19 | 22 | ||
| 20 | #ifdef CONFIG_BLK_DEV_INITRD | 23 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d16084f90649..388e05a5fc17 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <asm/kdebug.h> | 24 | #include <asm/kdebug.h> |
| 25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
| 26 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
| 27 | #include <asm/trampoline.h> | ||
| 27 | 28 | ||
| 28 | /* boot cpu pda */ | 29 | /* boot cpu pda */ |
| 29 | static struct x8664_pda _boot_cpu_pda __read_mostly; | 30 | static struct x8664_pda _boot_cpu_pda __read_mostly; |
| @@ -120,6 +121,8 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
| 120 | { | 121 | { |
| 121 | copy_bootdata(__va(real_mode_data)); | 122 | copy_bootdata(__va(real_mode_data)); |
| 122 | 123 | ||
| 124 | reserve_trampoline_memory(); | ||
| 125 | |||
| 123 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 126 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
| 124 | 127 | ||
| 125 | #ifdef CONFIG_BLK_DEV_INITRD | 128 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 067d8de913f6..3f0a3edf0a57 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -33,7 +33,9 @@ | |||
| 33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
| 34 | */ | 34 | */ |
| 35 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
| 36 | unsigned long hpet_num_timers; | 36 | #ifdef CONFIG_PCI_MSI |
| 37 | static unsigned long hpet_num_timers; | ||
| 38 | #endif | ||
| 37 | static void __iomem *hpet_virt_address; | 39 | static void __iomem *hpet_virt_address; |
| 38 | 40 | ||
| 39 | struct hpet_dev { | 41 | struct hpet_dev { |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index a4f93b4120c1..d39918076bb4 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
| @@ -14,7 +14,6 @@ static struct fs_struct init_fs = INIT_FS; | |||
| 14 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 14 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
| 15 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 15 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
| 16 | struct mm_struct init_mm = INIT_MM(init_mm); | 16 | struct mm_struct init_mm = INIT_MM(init_mm); |
| 17 | EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ | ||
| 18 | 17 | ||
| 19 | /* | 18 | /* |
| 20 | * Initial thread structure. | 19 | * Initial thread structure. |
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9043251210fb..679e7bbbbcd6 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c | |||
| @@ -2216,10 +2216,9 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
| 2216 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 2216 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
| 2217 | { | 2217 | { |
| 2218 | unsigned vector, me; | 2218 | unsigned vector, me; |
| 2219 | |||
| 2219 | ack_APIC_irq(); | 2220 | ack_APIC_irq(); |
| 2220 | #ifdef CONFIG_X86_64 | ||
| 2221 | exit_idle(); | 2221 | exit_idle(); |
| 2222 | #endif | ||
| 2223 | irq_enter(); | 2222 | irq_enter(); |
| 2224 | 2223 | ||
| 2225 | me = smp_processor_id(); | 2224 | me = smp_processor_id(); |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a0..1df869e5bd0b 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
| @@ -13,12 +13,12 @@ | |||
| 13 | #include <linux/seq_file.h> | 13 | #include <linux/seq_file.h> |
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/delay.h> | 15 | #include <linux/delay.h> |
| 16 | #include <linux/ftrace.h> | ||
| 16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
| 17 | #include <asm/io_apic.h> | 18 | #include <asm/io_apic.h> |
| 18 | #include <asm/idle.h> | 19 | #include <asm/idle.h> |
| 19 | #include <asm/smp.h> | 20 | #include <asm/smp.h> |
| 20 | 21 | ||
| 21 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
| 22 | /* | 22 | /* |
| 23 | * Probabilistic stack overflow check: | 23 | * Probabilistic stack overflow check: |
| 24 | * | 24 | * |
| @@ -28,26 +28,25 @@ | |||
| 28 | */ | 28 | */ |
| 29 | static inline void stack_overflow_check(struct pt_regs *regs) | 29 | static inline void stack_overflow_check(struct pt_regs *regs) |
| 30 | { | 30 | { |
| 31 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
| 31 | u64 curbase = (u64)task_stack_page(current); | 32 | u64 curbase = (u64)task_stack_page(current); |
| 32 | static unsigned long warned = -60*HZ; | 33 | |
| 33 | 34 | WARN_ONCE(regs->sp >= curbase && | |
| 34 | if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && | 35 | regs->sp <= curbase + THREAD_SIZE && |
| 35 | regs->sp < curbase + sizeof(struct thread_info) + 128 && | 36 | regs->sp < curbase + sizeof(struct thread_info) + |
| 36 | time_after(jiffies, warned + 60*HZ)) { | 37 | sizeof(struct pt_regs) + 128, |
| 37 | printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", | 38 | |
| 38 | current->comm, curbase, regs->sp); | 39 | "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", |
| 39 | show_stack(NULL,NULL); | 40 | current->comm, curbase, regs->sp); |
| 40 | warned = jiffies; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | #endif | 41 | #endif |
| 42 | } | ||
| 44 | 43 | ||
| 45 | /* | 44 | /* |
| 46 | * do_IRQ handles all normal device IRQ's (the special | 45 | * do_IRQ handles all normal device IRQ's (the special |
| 47 | * SMP cross-CPU interrupts have their own specific | 46 | * SMP cross-CPU interrupts have their own specific |
| 48 | * handlers). | 47 | * handlers). |
| 49 | */ | 48 | */ |
| 50 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | 49 | asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) |
| 51 | { | 50 | { |
| 52 | struct pt_regs *old_regs = set_irq_regs(regs); | 51 | struct pt_regs *old_regs = set_irq_regs(regs); |
| 53 | struct irq_desc *desc; | 52 | struct irq_desc *desc; |
| @@ -60,9 +59,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
| 60 | irq_enter(); | 59 | irq_enter(); |
| 61 | irq = __get_cpu_var(vector_irq)[vector]; | 60 | irq = __get_cpu_var(vector_irq)[vector]; |
| 62 | 61 | ||
| 63 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
| 64 | stack_overflow_check(regs); | 62 | stack_overflow_check(regs); |
| 65 | #endif | ||
| 66 | 63 | ||
| 67 | desc = irq_to_desc(irq); | 64 | desc = irq_to_desc(irq); |
| 68 | if (likely(desc)) | 65 | if (likely(desc)) |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 845aa9803e80..607db63044a5 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
| @@ -129,7 +129,7 @@ void __init native_init_IRQ(void) | |||
| 129 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | 129 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
| 130 | /* SYSCALL_VECTOR was reserved in trap_init. */ | 130 | /* SYSCALL_VECTOR was reserved in trap_init. */ |
| 131 | if (i != SYSCALL_VECTOR) | 131 | if (i != SYSCALL_VECTOR) |
| 132 | set_intr_gate(i, interrupt[i]); | 132 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | 135 | ||
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ff0235391285..8670b3ce626e 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
| @@ -24,41 +24,6 @@ | |||
| 24 | #include <asm/i8259.h> | 24 | #include <asm/i8259.h> |
| 25 | 25 | ||
| 26 | /* | 26 | /* |
| 27 | * Common place to define all x86 IRQ vectors | ||
| 28 | * | ||
| 29 | * This builds up the IRQ handler stubs using some ugly macros in irq.h | ||
| 30 | * | ||
| 31 | * These macros create the low-level assembly IRQ routines that save | ||
| 32 | * register context and call do_IRQ(). do_IRQ() then does all the | ||
| 33 | * operations that are needed to keep the AT (or SMP IOAPIC) | ||
| 34 | * interrupt-controller happy. | ||
| 35 | */ | ||
| 36 | |||
| 37 | #define IRQ_NAME2(nr) nr##_interrupt(void) | ||
| 38 | #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) | ||
| 39 | |||
| 40 | /* | ||
| 41 | * SMP has a few special interrupts for IPI messages | ||
| 42 | */ | ||
| 43 | |||
| 44 | #define BUILD_IRQ(nr) \ | ||
| 45 | asmlinkage void IRQ_NAME(nr); \ | ||
| 46 | asm("\n.text\n.p2align\n" \ | ||
| 47 | "IRQ" #nr "_interrupt:\n\t" \ | ||
| 48 | "push $~(" #nr ") ; " \ | ||
| 49 | "jmp common_interrupt\n" \ | ||
| 50 | ".previous"); | ||
| 51 | |||
| 52 | #define BI(x,y) \ | ||
| 53 | BUILD_IRQ(x##y) | ||
| 54 | |||
| 55 | #define BUILD_16_IRQS(x) \ | ||
| 56 | BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ | ||
| 57 | BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ | ||
| 58 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ | ||
| 59 | BI(x,c) BI(x,d) BI(x,e) BI(x,f) | ||
| 60 | |||
| 61 | /* | ||
| 62 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | 27 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: |
| 63 | * (these are usually mapped to vectors 0x30-0x3f) | 28 | * (these are usually mapped to vectors 0x30-0x3f) |
| 64 | */ | 29 | */ |
| @@ -73,37 +38,6 @@ | |||
| 73 | * | 38 | * |
| 74 | * (these are usually mapped into the 0x30-0xff vector range) | 39 | * (these are usually mapped into the 0x30-0xff vector range) |
| 75 | */ | 40 | */ |
| 76 | BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) | ||
| 77 | BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) | ||
| 78 | BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) | ||
| 79 | BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) | ||
| 80 | |||
| 81 | #undef BUILD_16_IRQS | ||
| 82 | #undef BI | ||
| 83 | |||
| 84 | |||
| 85 | #define IRQ(x,y) \ | ||
| 86 | IRQ##x##y##_interrupt | ||
| 87 | |||
| 88 | #define IRQLIST_16(x) \ | ||
| 89 | IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ | ||
| 90 | IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ | ||
| 91 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ | ||
| 92 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) | ||
| 93 | |||
| 94 | /* for the irq vectors */ | ||
| 95 | static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { | ||
| 96 | IRQLIST_16(0x2), IRQLIST_16(0x3), | ||
| 97 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), | ||
| 98 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), | ||
| 99 | IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) | ||
| 100 | }; | ||
| 101 | |||
| 102 | #undef IRQ | ||
| 103 | #undef IRQLIST_16 | ||
| 104 | |||
| 105 | |||
| 106 | |||
| 107 | 41 | ||
| 108 | /* | 42 | /* |
| 109 | * IRQ2 is cascade interrupt to second interrupt controller | 43 | * IRQ2 is cascade interrupt to second interrupt controller |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 7a385746509a..37f420018a41 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
| 14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
| 15 | #include <linux/suspend.h> | 15 | #include <linux/suspend.h> |
| 16 | #include <linux/gfp.h> | ||
| 16 | 17 | ||
| 17 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
| 18 | #include <asm/pgalloc.h> | 19 | #include <asm/pgalloc.h> |
| @@ -25,15 +26,6 @@ | |||
| 25 | #include <asm/system.h> | 26 | #include <asm/system.h> |
| 26 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
| 27 | 28 | ||
| 28 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | ||
| 29 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | ||
| 30 | #ifdef CONFIG_X86_PAE | ||
| 31 | static u32 kexec_pmd0[1024] PAGE_ALIGNED; | ||
| 32 | static u32 kexec_pmd1[1024] PAGE_ALIGNED; | ||
| 33 | #endif | ||
| 34 | static u32 kexec_pte0[1024] PAGE_ALIGNED; | ||
| 35 | static u32 kexec_pte1[1024] PAGE_ALIGNED; | ||
| 36 | |||
| 37 | static void set_idt(void *newidt, __u16 limit) | 29 | static void set_idt(void *newidt, __u16 limit) |
| 38 | { | 30 | { |
| 39 | struct desc_ptr curidt; | 31 | struct desc_ptr curidt; |
| @@ -76,6 +68,76 @@ static void load_segments(void) | |||
| 76 | #undef __STR | 68 | #undef __STR |
| 77 | } | 69 | } |
| 78 | 70 | ||
| 71 | static void machine_kexec_free_page_tables(struct kimage *image) | ||
| 72 | { | ||
| 73 | free_page((unsigned long)image->arch.pgd); | ||
| 74 | #ifdef CONFIG_X86_PAE | ||
| 75 | free_page((unsigned long)image->arch.pmd0); | ||
| 76 | free_page((unsigned long)image->arch.pmd1); | ||
| 77 | #endif | ||
| 78 | free_page((unsigned long)image->arch.pte0); | ||
| 79 | free_page((unsigned long)image->arch.pte1); | ||
| 80 | } | ||
| 81 | |||
| 82 | static int machine_kexec_alloc_page_tables(struct kimage *image) | ||
| 83 | { | ||
| 84 | image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); | ||
| 85 | #ifdef CONFIG_X86_PAE | ||
| 86 | image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
| 87 | image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
| 88 | #endif | ||
| 89 | image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
| 90 | image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
| 91 | if (!image->arch.pgd || | ||
| 92 | #ifdef CONFIG_X86_PAE | ||
| 93 | !image->arch.pmd0 || !image->arch.pmd1 || | ||
| 94 | #endif | ||
| 95 | !image->arch.pte0 || !image->arch.pte1) { | ||
| 96 | machine_kexec_free_page_tables(image); | ||
| 97 | return -ENOMEM; | ||
| 98 | } | ||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | static void machine_kexec_page_table_set_one( | ||
| 103 | pgd_t *pgd, pmd_t *pmd, pte_t *pte, | ||
| 104 | unsigned long vaddr, unsigned long paddr) | ||
| 105 | { | ||
| 106 | pud_t *pud; | ||
| 107 | |||
| 108 | pgd += pgd_index(vaddr); | ||
| 109 | #ifdef CONFIG_X86_PAE | ||
| 110 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) | ||
| 111 | set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT)); | ||
| 112 | #endif | ||
| 113 | pud = pud_offset(pgd, vaddr); | ||
| 114 | pmd = pmd_offset(pud, vaddr); | ||
| 115 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) | ||
| 116 | set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); | ||
| 117 | pte = pte_offset_kernel(pmd, vaddr); | ||
| 118 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); | ||
| 119 | } | ||
| 120 | |||
| 121 | static void machine_kexec_prepare_page_tables(struct kimage *image) | ||
| 122 | { | ||
| 123 | void *control_page; | ||
| 124 | pmd_t *pmd = 0; | ||
| 125 | |||
| 126 | control_page = page_address(image->control_code_page); | ||
| 127 | #ifdef CONFIG_X86_PAE | ||
| 128 | pmd = image->arch.pmd0; | ||
| 129 | #endif | ||
| 130 | machine_kexec_page_table_set_one( | ||
| 131 | image->arch.pgd, pmd, image->arch.pte0, | ||
| 132 | (unsigned long)control_page, __pa(control_page)); | ||
| 133 | #ifdef CONFIG_X86_PAE | ||
| 134 | pmd = image->arch.pmd1; | ||
| 135 | #endif | ||
| 136 | machine_kexec_page_table_set_one( | ||
| 137 | image->arch.pgd, pmd, image->arch.pte1, | ||
| 138 | __pa(control_page), __pa(control_page)); | ||
| 139 | } | ||
| 140 | |||
| 79 | /* | 141 | /* |
| 80 | * A architecture hook called to validate the | 142 | * A architecture hook called to validate the |
| 81 | * proposed image and prepare the control pages | 143 | * proposed image and prepare the control pages |
| @@ -87,12 +149,20 @@ static void load_segments(void) | |||
| 87 | * reboot code buffer to allow us to avoid allocations | 149 | * reboot code buffer to allow us to avoid allocations |
| 88 | * later. | 150 | * later. |
| 89 | * | 151 | * |
| 90 | * Make control page executable. | 152 | * - Make control page executable. |
| 153 | * - Allocate page tables | ||
| 154 | * - Setup page tables | ||
| 91 | */ | 155 | */ |
| 92 | int machine_kexec_prepare(struct kimage *image) | 156 | int machine_kexec_prepare(struct kimage *image) |
| 93 | { | 157 | { |
| 158 | int error; | ||
| 159 | |||
| 94 | if (nx_enabled) | 160 | if (nx_enabled) |
| 95 | set_pages_x(image->control_code_page, 1); | 161 | set_pages_x(image->control_code_page, 1); |
| 162 | error = machine_kexec_alloc_page_tables(image); | ||
| 163 | if (error) | ||
| 164 | return error; | ||
| 165 | machine_kexec_prepare_page_tables(image); | ||
| 96 | return 0; | 166 | return 0; |
| 97 | } | 167 | } |
| 98 | 168 | ||
| @@ -104,6 +174,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
| 104 | { | 174 | { |
| 105 | if (nx_enabled) | 175 | if (nx_enabled) |
| 106 | set_pages_nx(image->control_code_page, 1); | 176 | set_pages_nx(image->control_code_page, 1); |
| 177 | machine_kexec_free_page_tables(image); | ||
| 107 | } | 178 | } |
| 108 | 179 | ||
| 109 | /* | 180 | /* |
| @@ -150,18 +221,7 @@ void machine_kexec(struct kimage *image) | |||
| 150 | relocate_kernel_ptr = control_page; | 221 | relocate_kernel_ptr = control_page; |
| 151 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 222 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
| 152 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; | 223 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
| 153 | page_list[PA_PGD] = __pa(kexec_pgd); | 224 | page_list[PA_PGD] = __pa(image->arch.pgd); |
| 154 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | ||
| 155 | #ifdef CONFIG_X86_PAE | ||
| 156 | page_list[PA_PMD_0] = __pa(kexec_pmd0); | ||
| 157 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | ||
| 158 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | ||
| 159 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
| 160 | #endif | ||
| 161 | page_list[PA_PTE_0] = __pa(kexec_pte0); | ||
| 162 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | ||
| 163 | page_list[PA_PTE_1] = __pa(kexec_pte1); | ||
| 164 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
| 165 | 225 | ||
| 166 | if (image->type == KEXEC_TYPE_DEFAULT) | 226 | if (image->type == KEXEC_TYPE_DEFAULT) |
| 167 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) | 227 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 5f8e5d75a254..c25fdb382292 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | * This driver allows to upgrade microcode on AMD | 10 | * This driver allows to upgrade microcode on AMD |
| 11 | * family 0x10 and 0x11 processors. | 11 | * family 0x10 and 0x11 processors. |
| 12 | * | 12 | * |
| 13 | * Licensed unter the terms of the GNU General Public | 13 | * Licensed under the terms of the GNU General Public |
| 14 | * License version 2. See file COPYING for details. | 14 | * License version 2. See file COPYING for details. |
| 15 | */ | 15 | */ |
| 16 | 16 | ||
| @@ -32,9 +32,9 @@ | |||
| 32 | #include <linux/platform_device.h> | 32 | #include <linux/platform_device.h> |
| 33 | #include <linux/pci.h> | 33 | #include <linux/pci.h> |
| 34 | #include <linux/pci_ids.h> | 34 | #include <linux/pci_ids.h> |
| 35 | #include <linux/uaccess.h> | ||
| 35 | 36 | ||
| 36 | #include <asm/msr.h> | 37 | #include <asm/msr.h> |
| 37 | #include <asm/uaccess.h> | ||
| 38 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
| 39 | #include <asm/microcode.h> | 39 | #include <asm/microcode.h> |
| 40 | 40 | ||
| @@ -47,43 +47,38 @@ MODULE_LICENSE("GPL v2"); | |||
| 47 | #define UCODE_UCODE_TYPE 0x00000001 | 47 | #define UCODE_UCODE_TYPE 0x00000001 |
| 48 | 48 | ||
| 49 | struct equiv_cpu_entry { | 49 | struct equiv_cpu_entry { |
| 50 | unsigned int installed_cpu; | 50 | u32 installed_cpu; |
| 51 | unsigned int fixed_errata_mask; | 51 | u32 fixed_errata_mask; |
| 52 | unsigned int fixed_errata_compare; | 52 | u32 fixed_errata_compare; |
| 53 | unsigned int equiv_cpu; | 53 | u16 equiv_cpu; |
| 54 | }; | 54 | u16 res; |
| 55 | } __attribute__((packed)); | ||
| 55 | 56 | ||
| 56 | struct microcode_header_amd { | 57 | struct microcode_header_amd { |
| 57 | unsigned int data_code; | 58 | u32 data_code; |
| 58 | unsigned int patch_id; | 59 | u32 patch_id; |
| 59 | unsigned char mc_patch_data_id[2]; | 60 | u16 mc_patch_data_id; |
| 60 | unsigned char mc_patch_data_len; | 61 | u8 mc_patch_data_len; |
| 61 | unsigned char init_flag; | 62 | u8 init_flag; |
| 62 | unsigned int mc_patch_data_checksum; | 63 | u32 mc_patch_data_checksum; |
| 63 | unsigned int nb_dev_id; | 64 | u32 nb_dev_id; |
| 64 | unsigned int sb_dev_id; | 65 | u32 sb_dev_id; |
| 65 | unsigned char processor_rev_id[2]; | 66 | u16 processor_rev_id; |
| 66 | unsigned char nb_rev_id; | 67 | u8 nb_rev_id; |
| 67 | unsigned char sb_rev_id; | 68 | u8 sb_rev_id; |
| 68 | unsigned char bios_api_rev; | 69 | u8 bios_api_rev; |
| 69 | unsigned char reserved1[3]; | 70 | u8 reserved1[3]; |
| 70 | unsigned int match_reg[8]; | 71 | u32 match_reg[8]; |
| 71 | }; | 72 | } __attribute__((packed)); |
| 72 | 73 | ||
| 73 | struct microcode_amd { | 74 | struct microcode_amd { |
| 74 | struct microcode_header_amd hdr; | 75 | struct microcode_header_amd hdr; |
| 75 | unsigned int mpb[0]; | 76 | unsigned int mpb[0]; |
| 76 | }; | 77 | }; |
| 77 | 78 | ||
| 78 | #define UCODE_MAX_SIZE (2048) | 79 | #define UCODE_MAX_SIZE 2048 |
| 79 | #define DEFAULT_UCODE_DATASIZE (896) | 80 | #define UCODE_CONTAINER_SECTION_HDR 8 |
| 80 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) | 81 | #define UCODE_CONTAINER_HEADER_SIZE 12 |
| 81 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
| 82 | #define DWSIZE (sizeof(u32)) | ||
| 83 | /* For now we support a fixed ucode total size only */ | ||
| 84 | #define get_totalsize(mc) \ | ||
| 85 | ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ | ||
| 86 | + MC_HEADER_SIZE) | ||
| 87 | 82 | ||
| 88 | /* serialize access to the physical write */ | 83 | /* serialize access to the physical write */ |
| 89 | static DEFINE_SPINLOCK(microcode_update_lock); | 84 | static DEFINE_SPINLOCK(microcode_update_lock); |
| @@ -93,31 +88,24 @@ static struct equiv_cpu_entry *equiv_cpu_table; | |||
| 93 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 88 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
| 94 | { | 89 | { |
| 95 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 90 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 91 | u32 dummy; | ||
| 96 | 92 | ||
| 97 | memset(csig, 0, sizeof(*csig)); | 93 | memset(csig, 0, sizeof(*csig)); |
| 98 | |||
| 99 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | 94 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { |
| 100 | printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", | 95 | printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not " |
| 101 | cpu); | 96 | "supported\n", cpu, c->x86); |
| 102 | return -1; | 97 | return -1; |
| 103 | } | 98 | } |
| 104 | 99 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | |
| 105 | asm volatile("movl %1, %%ecx; rdmsr" | 100 | printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); |
| 106 | : "=a" (csig->rev) | ||
| 107 | : "i" (0x0000008B) : "ecx"); | ||
| 108 | |||
| 109 | printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", | ||
| 110 | csig->rev); | ||
| 111 | |||
| 112 | return 0; | 101 | return 0; |
| 113 | } | 102 | } |
| 114 | 103 | ||
| 115 | static int get_matching_microcode(int cpu, void *mc, int rev) | 104 | static int get_matching_microcode(int cpu, void *mc, int rev) |
| 116 | { | 105 | { |
| 117 | struct microcode_header_amd *mc_header = mc; | 106 | struct microcode_header_amd *mc_header = mc; |
| 118 | struct pci_dev *nb_pci_dev, *sb_pci_dev; | ||
| 119 | unsigned int current_cpu_id; | 107 | unsigned int current_cpu_id; |
| 120 | unsigned int equiv_cpu_id = 0x00; | 108 | u16 equiv_cpu_id = 0; |
| 121 | unsigned int i = 0; | 109 | unsigned int i = 0; |
| 122 | 110 | ||
| 123 | BUG_ON(equiv_cpu_table == NULL); | 111 | BUG_ON(equiv_cpu_table == NULL); |
| @@ -132,57 +120,25 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
| 132 | } | 120 | } |
| 133 | 121 | ||
| 134 | if (!equiv_cpu_id) { | 122 | if (!equiv_cpu_id) { |
| 135 | printk(KERN_ERR "microcode: CPU%d cpu_id " | 123 | printk(KERN_WARNING "microcode: CPU%d: cpu revision " |
| 136 | "not found in equivalent cpu table \n", cpu); | 124 | "not listed in equivalent cpu table\n", cpu); |
| 137 | return 0; | 125 | return 0; |
| 138 | } | 126 | } |
| 139 | 127 | ||
| 140 | if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { | 128 | if (mc_header->processor_rev_id != equiv_cpu_id) { |
| 141 | printk(KERN_ERR | 129 | printk(KERN_ERR "microcode: CPU%d: patch mismatch " |
| 142 | "microcode: CPU%d patch does not match " | 130 | "(processor_rev_id: %x, equiv_cpu_id: %x)\n", |
| 143 | "(patch is %x, cpu extended is %x) \n", | 131 | cpu, mc_header->processor_rev_id, equiv_cpu_id); |
| 144 | cpu, mc_header->processor_rev_id[0], | ||
| 145 | (equiv_cpu_id & 0xff)); | ||
| 146 | return 0; | 132 | return 0; |
| 147 | } | 133 | } |
| 148 | 134 | ||
| 149 | if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { | 135 | /* ucode might be chipset specific -- currently we don't support this */ |
| 150 | printk(KERN_ERR "microcode: CPU%d patch does not match " | 136 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { |
| 151 | "(patch is %x, cpu base id is %x) \n", | 137 | printk(KERN_ERR "microcode: CPU%d: loading of chipset " |
| 152 | cpu, mc_header->processor_rev_id[1], | 138 | "specific code not yet supported\n", cpu); |
| 153 | ((equiv_cpu_id >> 16) & 0xff)); | ||
| 154 | |||
| 155 | return 0; | 139 | return 0; |
| 156 | } | 140 | } |
| 157 | 141 | ||
| 158 | /* ucode may be northbridge specific */ | ||
| 159 | if (mc_header->nb_dev_id) { | ||
| 160 | nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
| 161 | (mc_header->nb_dev_id & 0xff), | ||
| 162 | NULL); | ||
| 163 | if ((!nb_pci_dev) || | ||
| 164 | (mc_header->nb_rev_id != nb_pci_dev->revision)) { | ||
| 165 | printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu); | ||
| 166 | pci_dev_put(nb_pci_dev); | ||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | pci_dev_put(nb_pci_dev); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* ucode may be southbridge specific */ | ||
| 173 | if (mc_header->sb_dev_id) { | ||
| 174 | sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
| 175 | (mc_header->sb_dev_id & 0xff), | ||
| 176 | NULL); | ||
| 177 | if ((!sb_pci_dev) || | ||
| 178 | (mc_header->sb_rev_id != sb_pci_dev->revision)) { | ||
| 179 | printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu); | ||
| 180 | pci_dev_put(sb_pci_dev); | ||
| 181 | return 0; | ||
| 182 | } | ||
| 183 | pci_dev_put(sb_pci_dev); | ||
| 184 | } | ||
| 185 | |||
| 186 | if (mc_header->patch_id <= rev) | 142 | if (mc_header->patch_id <= rev) |
| 187 | return 0; | 143 | return 0; |
| 188 | 144 | ||
| @@ -192,12 +148,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
| 192 | static void apply_microcode_amd(int cpu) | 148 | static void apply_microcode_amd(int cpu) |
| 193 | { | 149 | { |
| 194 | unsigned long flags; | 150 | unsigned long flags; |
| 195 | unsigned int eax, edx; | 151 | u32 rev, dummy; |
| 196 | unsigned int rev; | ||
| 197 | int cpu_num = raw_smp_processor_id(); | 152 | int cpu_num = raw_smp_processor_id(); |
| 198 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | 153 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; |
| 199 | struct microcode_amd *mc_amd = uci->mc; | 154 | struct microcode_amd *mc_amd = uci->mc; |
| 200 | unsigned long addr; | ||
| 201 | 155 | ||
| 202 | /* We should bind the task to the CPU */ | 156 | /* We should bind the task to the CPU */ |
| 203 | BUG_ON(cpu_num != cpu); | 157 | BUG_ON(cpu_num != cpu); |
| @@ -206,42 +160,34 @@ static void apply_microcode_amd(int cpu) | |||
| 206 | return; | 160 | return; |
| 207 | 161 | ||
| 208 | spin_lock_irqsave(µcode_update_lock, flags); | 162 | spin_lock_irqsave(µcode_update_lock, flags); |
| 209 | 163 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | |
| 210 | addr = (unsigned long)&mc_amd->hdr.data_code; | ||
| 211 | edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); | ||
| 212 | eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); | ||
| 213 | |||
| 214 | asm volatile("movl %0, %%ecx; wrmsr" : | ||
| 215 | : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); | ||
| 216 | |||
| 217 | /* get patch id after patching */ | 164 | /* get patch id after patching */ |
| 218 | asm volatile("movl %1, %%ecx; rdmsr" | 165 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
| 219 | : "=a" (rev) | ||
| 220 | : "i" (0x0000008B) : "ecx"); | ||
| 221 | |||
| 222 | spin_unlock_irqrestore(µcode_update_lock, flags); | 166 | spin_unlock_irqrestore(µcode_update_lock, flags); |
| 223 | 167 | ||
| 224 | /* check current patch id and patch's id for match */ | 168 | /* check current patch id and patch's id for match */ |
| 225 | if (rev != mc_amd->hdr.patch_id) { | 169 | if (rev != mc_amd->hdr.patch_id) { |
| 226 | printk(KERN_ERR "microcode: CPU%d update from revision " | 170 | printk(KERN_ERR "microcode: CPU%d: update failed " |
| 227 | "0x%x to 0x%x failed\n", cpu_num, | 171 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); |
| 228 | mc_amd->hdr.patch_id, rev); | ||
| 229 | return; | 172 | return; |
| 230 | } | 173 | } |
| 231 | 174 | ||
| 232 | printk(KERN_INFO "microcode: CPU%d updated from revision " | 175 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", |
| 233 | "0x%x to 0x%x \n", | 176 | cpu, rev); |
| 234 | cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id); | ||
| 235 | 177 | ||
| 236 | uci->cpu_sig.rev = rev; | 178 | uci->cpu_sig.rev = rev; |
| 237 | } | 179 | } |
| 238 | 180 | ||
| 239 | static void * get_next_ucode(u8 *buf, unsigned int size, | 181 | static int get_ucode_data(void *to, const u8 *from, size_t n) |
| 240 | int (*get_ucode_data)(void *, const void *, size_t), | 182 | { |
| 241 | unsigned int *mc_size) | 183 | memcpy(to, from, n); |
| 184 | return 0; | ||
| 185 | } | ||
| 186 | |||
| 187 | static void *get_next_ucode(const u8 *buf, unsigned int size, | ||
| 188 | unsigned int *mc_size) | ||
| 242 | { | 189 | { |
| 243 | unsigned int total_size; | 190 | unsigned int total_size; |
| 244 | #define UCODE_CONTAINER_SECTION_HDR 8 | ||
| 245 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | 191 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; |
| 246 | void *mc; | 192 | void *mc; |
| 247 | 193 | ||
| @@ -249,39 +195,37 @@ static void * get_next_ucode(u8 *buf, unsigned int size, | |||
| 249 | return NULL; | 195 | return NULL; |
| 250 | 196 | ||
| 251 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 197 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
| 252 | printk(KERN_ERR "microcode: error! " | 198 | printk(KERN_ERR "microcode: error: invalid type field in " |
| 253 | "Wrong microcode payload type field\n"); | 199 | "container file section header\n"); |
| 254 | return NULL; | 200 | return NULL; |
| 255 | } | 201 | } |
| 256 | 202 | ||
| 257 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | 203 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); |
| 258 | 204 | ||
| 259 | printk(KERN_INFO "microcode: size %u, total_size %u\n", | 205 | printk(KERN_DEBUG "microcode: size %u, total_size %u\n", |
| 260 | size, total_size); | 206 | size, total_size); |
| 261 | 207 | ||
| 262 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | 208 | if (total_size > size || total_size > UCODE_MAX_SIZE) { |
| 263 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | 209 | printk(KERN_ERR "microcode: error: size mismatch\n"); |
| 264 | return NULL; | 210 | return NULL; |
| 265 | } | 211 | } |
| 266 | 212 | ||
| 267 | mc = vmalloc(UCODE_MAX_SIZE); | 213 | mc = vmalloc(UCODE_MAX_SIZE); |
| 268 | if (mc) { | 214 | if (mc) { |
| 269 | memset(mc, 0, UCODE_MAX_SIZE); | 215 | memset(mc, 0, UCODE_MAX_SIZE); |
| 270 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { | 216 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, |
| 217 | total_size)) { | ||
| 271 | vfree(mc); | 218 | vfree(mc); |
| 272 | mc = NULL; | 219 | mc = NULL; |
| 273 | } else | 220 | } else |
| 274 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; | 221 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; |
| 275 | } | 222 | } |
| 276 | #undef UCODE_CONTAINER_SECTION_HDR | ||
| 277 | return mc; | 223 | return mc; |
| 278 | } | 224 | } |
| 279 | 225 | ||
| 280 | 226 | ||
| 281 | static int install_equiv_cpu_table(u8 *buf, | 227 | static int install_equiv_cpu_table(const u8 *buf) |
| 282 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
| 283 | { | 228 | { |
| 284 | #define UCODE_CONTAINER_HEADER_SIZE 12 | ||
| 285 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; | 229 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; |
| 286 | unsigned int *buf_pos = (unsigned int *)container_hdr; | 230 | unsigned int *buf_pos = (unsigned int *)container_hdr; |
| 287 | unsigned long size; | 231 | unsigned long size; |
| @@ -292,14 +236,15 @@ static int install_equiv_cpu_table(u8 *buf, | |||
| 292 | size = buf_pos[2]; | 236 | size = buf_pos[2]; |
| 293 | 237 | ||
| 294 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | 238 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { |
| 295 | printk(KERN_ERR "microcode: error! " | 239 | printk(KERN_ERR "microcode: error: invalid type field in " |
| 296 | "Wrong microcode equivalnet cpu table\n"); | 240 | "container file section header\n"); |
| 297 | return 0; | 241 | return 0; |
| 298 | } | 242 | } |
| 299 | 243 | ||
| 300 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); | 244 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); |
| 301 | if (!equiv_cpu_table) { | 245 | if (!equiv_cpu_table) { |
| 302 | printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); | 246 | printk(KERN_ERR "microcode: failed to allocate " |
| 247 | "equivalent CPU table\n"); | ||
| 303 | return 0; | 248 | return 0; |
| 304 | } | 249 | } |
| 305 | 250 | ||
| @@ -310,7 +255,6 @@ static int install_equiv_cpu_table(u8 *buf, | |||
| 310 | } | 255 | } |
| 311 | 256 | ||
| 312 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | 257 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ |
| 313 | #undef UCODE_CONTAINER_HEADER_SIZE | ||
| 314 | } | 258 | } |
| 315 | 259 | ||
| 316 | static void free_equiv_cpu_table(void) | 260 | static void free_equiv_cpu_table(void) |
| @@ -321,18 +265,20 @@ static void free_equiv_cpu_table(void) | |||
| 321 | } | 265 | } |
| 322 | } | 266 | } |
| 323 | 267 | ||
| 324 | static int generic_load_microcode(int cpu, void *data, size_t size, | 268 | static int generic_load_microcode(int cpu, const u8 *data, size_t size) |
| 325 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
| 326 | { | 269 | { |
| 327 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 270 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
| 328 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 271 | const u8 *ucode_ptr = data; |
| 272 | void *new_mc = NULL; | ||
| 273 | void *mc; | ||
| 329 | int new_rev = uci->cpu_sig.rev; | 274 | int new_rev = uci->cpu_sig.rev; |
| 330 | unsigned int leftover; | 275 | unsigned int leftover; |
| 331 | unsigned long offset; | 276 | unsigned long offset; |
| 332 | 277 | ||
| 333 | offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); | 278 | offset = install_equiv_cpu_table(ucode_ptr); |
| 334 | if (!offset) { | 279 | if (!offset) { |
| 335 | printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); | 280 | printk(KERN_ERR "microcode: failed to create " |
| 281 | "equivalent cpu table\n"); | ||
| 336 | return -EINVAL; | 282 | return -EINVAL; |
| 337 | } | 283 | } |
| 338 | 284 | ||
| @@ -343,7 +289,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
| 343 | unsigned int uninitialized_var(mc_size); | 289 | unsigned int uninitialized_var(mc_size); |
| 344 | struct microcode_header_amd *mc_header; | 290 | struct microcode_header_amd *mc_header; |
| 345 | 291 | ||
| 346 | mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); | 292 | mc = get_next_ucode(ucode_ptr, leftover, &mc_size); |
| 347 | if (!mc) | 293 | if (!mc) |
| 348 | break; | 294 | break; |
| 349 | 295 | ||
| @@ -353,7 +299,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
| 353 | vfree(new_mc); | 299 | vfree(new_mc); |
| 354 | new_rev = mc_header->patch_id; | 300 | new_rev = mc_header->patch_id; |
| 355 | new_mc = mc; | 301 | new_mc = mc; |
| 356 | } else | 302 | } else |
| 357 | vfree(mc); | 303 | vfree(mc); |
| 358 | 304 | ||
| 359 | ucode_ptr += mc_size; | 305 | ucode_ptr += mc_size; |
| @@ -365,9 +311,9 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
| 365 | if (uci->mc) | 311 | if (uci->mc) |
| 366 | vfree(uci->mc); | 312 | vfree(uci->mc); |
| 367 | uci->mc = new_mc; | 313 | uci->mc = new_mc; |
| 368 | pr_debug("microcode: CPU%d found a matching microcode update with" | 314 | pr_debug("microcode: CPU%d found a matching microcode " |
| 369 | " version 0x%x (current=0x%x)\n", | 315 | "update with version 0x%x (current=0x%x)\n", |
| 370 | cpu, new_rev, uci->cpu_sig.rev); | 316 | cpu, new_rev, uci->cpu_sig.rev); |
| 371 | } else | 317 | } else |
| 372 | vfree(new_mc); | 318 | vfree(new_mc); |
| 373 | } | 319 | } |
| @@ -377,12 +323,6 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
| 377 | return (int)leftover; | 323 | return (int)leftover; |
| 378 | } | 324 | } |
| 379 | 325 | ||
| 380 | static int get_ucode_fw(void *to, const void *from, size_t n) | ||
| 381 | { | ||
| 382 | memcpy(to, from, n); | ||
| 383 | return 0; | ||
| 384 | } | ||
| 385 | |||
| 386 | static int request_microcode_fw(int cpu, struct device *device) | 326 | static int request_microcode_fw(int cpu, struct device *device) |
| 387 | { | 327 | { |
| 388 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | 328 | const char *fw_name = "amd-ucode/microcode_amd.bin"; |
| @@ -394,12 +334,11 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
| 394 | 334 | ||
| 395 | ret = request_firmware(&firmware, fw_name, device); | 335 | ret = request_firmware(&firmware, fw_name, device); |
| 396 | if (ret) { | 336 | if (ret) { |
| 397 | printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); | 337 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); |
| 398 | return ret; | 338 | return ret; |
| 399 | } | 339 | } |
| 400 | 340 | ||
| 401 | ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, | 341 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); |
| 402 | &get_ucode_fw); | ||
| 403 | 342 | ||
| 404 | release_firmware(firmware); | 343 | release_firmware(firmware); |
| 405 | 344 | ||
| @@ -408,8 +347,8 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
| 408 | 347 | ||
| 409 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) | 348 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) |
| 410 | { | 349 | { |
| 411 | printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" | 350 | printk(KERN_INFO "microcode: AMD microcode update via " |
| 412 | "is not supported\n"); | 351 | "/dev/cpu/microcode not supported\n"); |
| 413 | return -1; | 352 | return -1; |
| 414 | } | 353 | } |
| 415 | 354 | ||
| @@ -433,3 +372,4 @@ struct microcode_ops * __init init_amd_microcode(void) | |||
| 433 | { | 372 | { |
| 434 | return µcode_amd_ops; | 373 | return µcode_amd_ops; |
| 435 | } | 374 | } |
| 375 | |||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index c4b5b24e0217..c9b721ba968c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
| @@ -99,7 +99,7 @@ MODULE_LICENSE("GPL"); | |||
| 99 | 99 | ||
| 100 | #define MICROCODE_VERSION "2.00" | 100 | #define MICROCODE_VERSION "2.00" |
| 101 | 101 | ||
| 102 | struct microcode_ops *microcode_ops; | 102 | static struct microcode_ops *microcode_ops; |
| 103 | 103 | ||
| 104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | 104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ |
| 105 | static DEFINE_MUTEX(microcode_mutex); | 105 | static DEFINE_MUTEX(microcode_mutex); |
| @@ -203,7 +203,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | |||
| 203 | #endif | 203 | #endif |
| 204 | 204 | ||
| 205 | /* fake device for request_firmware */ | 205 | /* fake device for request_firmware */ |
| 206 | struct platform_device *microcode_pdev; | 206 | static struct platform_device *microcode_pdev; |
| 207 | 207 | ||
| 208 | static ssize_t reload_store(struct sys_device *dev, | 208 | static ssize_t reload_store(struct sys_device *dev, |
| 209 | struct sysdev_attribute *attr, | 209 | struct sysdev_attribute *attr, |
| @@ -328,7 +328,7 @@ static int microcode_resume_cpu(int cpu) | |||
| 328 | return 0; | 328 | return 0; |
| 329 | } | 329 | } |
| 330 | 330 | ||
| 331 | void microcode_update_cpu(int cpu) | 331 | static void microcode_update_cpu(int cpu) |
| 332 | { | 332 | { |
| 333 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 333 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
| 334 | int err = 0; | 334 | int err = 0; |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index a8e62792d171..b7f4c929e615 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
| @@ -471,7 +471,7 @@ static void microcode_fini_cpu(int cpu) | |||
| 471 | uci->mc = NULL; | 471 | uci->mc = NULL; |
| 472 | } | 472 | } |
| 473 | 473 | ||
| 474 | struct microcode_ops microcode_intel_ops = { | 474 | static struct microcode_ops microcode_intel_ops = { |
| 475 | .request_microcode_user = request_microcode_user, | 475 | .request_microcode_user = request_microcode_user, |
| 476 | .request_microcode_fw = request_microcode_fw, | 476 | .request_microcode_fw = request_microcode_fw, |
| 477 | .collect_cpu_info = collect_cpu_info, | 477 | .collect_cpu_info = collect_cpu_info, |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0f4c1fd5a1f4..45e3b69808ba 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
| @@ -586,26 +586,23 @@ static void __init __get_smp_config(unsigned int early) | |||
| 586 | { | 586 | { |
| 587 | struct intel_mp_floating *mpf = mpf_found; | 587 | struct intel_mp_floating *mpf = mpf_found; |
| 588 | 588 | ||
| 589 | if (x86_quirks->mach_get_smp_config) { | 589 | if (!mpf) |
| 590 | if (x86_quirks->mach_get_smp_config(early)) | 590 | return; |
| 591 | return; | 591 | |
| 592 | } | ||
| 593 | if (acpi_lapic && early) | 592 | if (acpi_lapic && early) |
| 594 | return; | 593 | return; |
| 594 | |||
| 595 | /* | 595 | /* |
| 596 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | 596 | * MPS doesn't support hyperthreading, aka only have |
| 597 | * processors, where MPS only supports physical. | 597 | * thread 0 apic id in MPS table |
| 598 | */ | 598 | */ |
| 599 | if (acpi_lapic && acpi_ioapic) { | 599 | if (acpi_lapic && acpi_ioapic) |
| 600 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " | ||
| 601 | "information\n"); | ||
| 602 | return; | 600 | return; |
| 603 | } else if (acpi_lapic) | ||
| 604 | printk(KERN_INFO "Using ACPI for processor (LAPIC) " | ||
| 605 | "configuration information\n"); | ||
| 606 | 601 | ||
| 607 | if (!mpf) | 602 | if (x86_quirks->mach_get_smp_config) { |
| 608 | return; | 603 | if (x86_quirks->mach_get_smp_config(early)) |
| 604 | return; | ||
| 605 | } | ||
| 609 | 606 | ||
| 610 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 607 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
| 611 | mpf->mpf_specification); | 608 | mpf->mpf_specification); |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2c97f07f1c2c..8bd1bf9622a7 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
| @@ -131,6 +131,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) | |||
| 131 | atomic_dec(&nmi_active); | 131 | atomic_dec(&nmi_active); |
| 132 | } | 132 | } |
| 133 | 133 | ||
| 134 | static void __acpi_nmi_disable(void *__unused) | ||
| 135 | { | ||
| 136 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
| 137 | } | ||
| 138 | |||
| 134 | int __init check_nmi_watchdog(void) | 139 | int __init check_nmi_watchdog(void) |
| 135 | { | 140 | { |
| 136 | unsigned int *prev_nmi_count; | 141 | unsigned int *prev_nmi_count; |
| @@ -179,8 +184,12 @@ int __init check_nmi_watchdog(void) | |||
| 179 | kfree(prev_nmi_count); | 184 | kfree(prev_nmi_count); |
| 180 | return 0; | 185 | return 0; |
| 181 | error: | 186 | error: |
| 182 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) | 187 | if (nmi_watchdog == NMI_IO_APIC) { |
| 183 | disable_8259A_irq(0); | 188 | if (!timer_through_8259) |
| 189 | disable_8259A_irq(0); | ||
| 190 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
| 191 | } | ||
| 192 | |||
| 184 | #ifdef CONFIG_X86_32 | 193 | #ifdef CONFIG_X86_32 |
| 185 | timer_ack = 0; | 194 | timer_ack = 0; |
| 186 | #endif | 195 | #endif |
| @@ -199,12 +208,17 @@ static int __init setup_nmi_watchdog(char *str) | |||
| 199 | ++str; | 208 | ++str; |
| 200 | } | 209 | } |
| 201 | 210 | ||
| 202 | get_option(&str, &nmi); | 211 | if (!strncmp(str, "lapic", 5)) |
| 203 | 212 | nmi_watchdog = NMI_LOCAL_APIC; | |
| 204 | if (nmi >= NMI_INVALID) | 213 | else if (!strncmp(str, "ioapic", 6)) |
| 205 | return 0; | 214 | nmi_watchdog = NMI_IO_APIC; |
| 215 | else { | ||
| 216 | get_option(&str, &nmi); | ||
| 217 | if (nmi >= NMI_INVALID) | ||
| 218 | return 0; | ||
| 219 | nmi_watchdog = nmi; | ||
| 220 | } | ||
| 206 | 221 | ||
| 207 | nmi_watchdog = nmi; | ||
| 208 | return 1; | 222 | return 1; |
| 209 | } | 223 | } |
| 210 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 224 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
| @@ -285,11 +299,6 @@ void acpi_nmi_enable(void) | |||
| 285 | on_each_cpu(__acpi_nmi_enable, NULL, 1); | 299 | on_each_cpu(__acpi_nmi_enable, NULL, 1); |
| 286 | } | 300 | } |
| 287 | 301 | ||
| 288 | static void __acpi_nmi_disable(void *__unused) | ||
| 289 | { | ||
| 290 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
| 291 | } | ||
| 292 | |||
| 293 | /* | 302 | /* |
| 294 | * Disable timer based NMIs on all CPUs: | 303 | * Disable timer based NMIs on all CPUs: |
| 295 | */ | 304 | */ |
| @@ -340,6 +349,8 @@ void stop_apic_nmi_watchdog(void *unused) | |||
| 340 | return; | 349 | return; |
| 341 | if (nmi_watchdog == NMI_LOCAL_APIC) | 350 | if (nmi_watchdog == NMI_LOCAL_APIC) |
| 342 | lapic_watchdog_stop(); | 351 | lapic_watchdog_stop(); |
| 352 | else | ||
| 353 | __acpi_nmi_disable(NULL); | ||
| 343 | __get_cpu_var(wd_enabled) = 0; | 354 | __get_cpu_var(wd_enabled) = 0; |
| 344 | atomic_dec(&nmi_active); | 355 | atomic_dec(&nmi_active); |
| 345 | } | 356 | } |
| @@ -465,6 +476,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
| 465 | 476 | ||
| 466 | #ifdef CONFIG_SYSCTL | 477 | #ifdef CONFIG_SYSCTL |
| 467 | 478 | ||
| 479 | static void enable_ioapic_nmi_watchdog_single(void *unused) | ||
| 480 | { | ||
| 481 | __get_cpu_var(wd_enabled) = 1; | ||
| 482 | atomic_inc(&nmi_active); | ||
| 483 | __acpi_nmi_enable(NULL); | ||
| 484 | } | ||
| 485 | |||
| 486 | static void enable_ioapic_nmi_watchdog(void) | ||
| 487 | { | ||
| 488 | on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); | ||
| 489 | touch_nmi_watchdog(); | ||
| 490 | } | ||
| 491 | |||
| 492 | static void disable_ioapic_nmi_watchdog(void) | ||
| 493 | { | ||
| 494 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
| 495 | } | ||
| 496 | |||
| 468 | static int __init setup_unknown_nmi_panic(char *str) | 497 | static int __init setup_unknown_nmi_panic(char *str) |
| 469 | { | 498 | { |
| 470 | unknown_nmi_panic = 1; | 499 | unknown_nmi_panic = 1; |
| @@ -507,6 +536,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
| 507 | enable_lapic_nmi_watchdog(); | 536 | enable_lapic_nmi_watchdog(); |
| 508 | else | 537 | else |
| 509 | disable_lapic_nmi_watchdog(); | 538 | disable_lapic_nmi_watchdog(); |
| 539 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
| 540 | if (nmi_watchdog_enabled) | ||
| 541 | enable_ioapic_nmi_watchdog(); | ||
| 542 | else | ||
| 543 | disable_ioapic_nmi_watchdog(); | ||
| 510 | } else { | 544 | } else { |
| 511 | printk(KERN_WARNING | 545 | printk(KERN_WARNING |
| 512 | "NMI watchdog doesn't know what hardware to touch\n"); | 546 | "NMI watchdog doesn't know what hardware to touch\n"); |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 4caff39078e0..0deea37a53cf 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #include <asm/numaq.h> | 31 | #include <asm/numaq.h> |
| 32 | #include <asm/topology.h> | 32 | #include <asm/topology.h> |
| 33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
| 34 | #include <asm/mpspec.h> | 34 | #include <asm/genapic.h> |
| 35 | #include <asm/e820.h> | 35 | #include <asm/e820.h> |
| 36 | #include <asm/setup.h> | 36 | #include <asm/setup.h> |
| 37 | 37 | ||
| @@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void) | |||
| 235 | return 1; | 235 | return 1; |
| 236 | } | 236 | } |
| 237 | 237 | ||
| 238 | static int __init numaq_update_genapic(void) | ||
| 239 | { | ||
| 240 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; | ||
| 241 | |||
| 242 | return 0; | ||
| 243 | } | ||
| 244 | |||
| 238 | static struct x86_quirks numaq_x86_quirks __initdata = { | 245 | static struct x86_quirks numaq_x86_quirks __initdata = { |
| 239 | .arch_pre_time_init = numaq_pre_time_init, | 246 | .arch_pre_time_init = numaq_pre_time_init, |
| 240 | .arch_time_init = NULL, | 247 | .arch_time_init = NULL, |
| @@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { | |||
| 250 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | 257 | .mpc_oem_pci_bus = mpc_oem_pci_bus, |
| 251 | .smp_read_mpc_oem = smp_read_mpc_oem, | 258 | .smp_read_mpc_oem = smp_read_mpc_oem, |
| 252 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | 259 | .setup_ioapic_ids = numaq_setup_ioapic_ids, |
| 260 | .update_genapic = numaq_update_genapic, | ||
| 253 | }; | 261 | }; |
| 254 | 262 | ||
| 255 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | 263 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 192624820217..7a3dfceb90e4 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <asm/proto.h> | 6 | #include <asm/proto.h> |
| 7 | #include <asm/dma.h> | 7 | #include <asm/dma.h> |
| 8 | #include <asm/iommu.h> | 8 | #include <asm/iommu.h> |
| 9 | #include <asm/gart.h> | ||
| 9 | #include <asm/calgary.h> | 10 | #include <asm/calgary.h> |
| 10 | #include <asm/amd_iommu.h> | 11 | #include <asm/amd_iommu.h> |
| 11 | 12 | ||
| @@ -30,11 +31,6 @@ int no_iommu __read_mostly; | |||
| 30 | /* Set this to 1 if there is a HW IOMMU in the system */ | 31 | /* Set this to 1 if there is a HW IOMMU in the system */ |
| 31 | int iommu_detected __read_mostly = 0; | 32 | int iommu_detected __read_mostly = 0; |
| 32 | 33 | ||
| 33 | /* This tells the BIO block layer to assume merging. Default to off | ||
| 34 | because we cannot guarantee merging later. */ | ||
| 35 | int iommu_bio_merge __read_mostly = 0; | ||
| 36 | EXPORT_SYMBOL(iommu_bio_merge); | ||
| 37 | |||
| 38 | dma_addr_t bad_dma_address __read_mostly = 0; | 34 | dma_addr_t bad_dma_address __read_mostly = 0; |
| 39 | EXPORT_SYMBOL(bad_dma_address); | 35 | EXPORT_SYMBOL(bad_dma_address); |
| 40 | 36 | ||
| @@ -188,7 +184,6 @@ static __init int iommu_setup(char *p) | |||
| 188 | } | 184 | } |
| 189 | 185 | ||
| 190 | if (!strncmp(p, "biomerge", 8)) { | 186 | if (!strncmp(p, "biomerge", 8)) { |
| 191 | iommu_bio_merge = 4096; | ||
| 192 | iommu_merge = 1; | 187 | iommu_merge = 1; |
| 193 | force_iommu = 1; | 188 | force_iommu = 1; |
| 194 | } | 189 | } |
| @@ -300,8 +295,8 @@ fs_initcall(pci_iommu_init); | |||
| 300 | static __devinit void via_no_dac(struct pci_dev *dev) | 295 | static __devinit void via_no_dac(struct pci_dev *dev) |
| 301 | { | 296 | { |
| 302 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { | 297 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { |
| 303 | printk(KERN_INFO "PCI: VIA PCI bridge detected." | 298 | printk(KERN_INFO |
| 304 | "Disabling DAC.\n"); | 299 | "PCI: VIA PCI bridge detected. Disabling DAC.\n"); |
| 305 | forbid_dac = 1; | 300 | forbid_dac = 1; |
| 306 | } | 301 | } |
| 307 | } | 302 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c622772744d8..e68bb9e30864 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -1,13 +1,16 @@ | |||
| 1 | #include <linux/errno.h> | 1 | #include <linux/errno.h> |
| 2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
| 3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
| 4 | #include <asm/idle.h> | ||
| 4 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
| 5 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
| 6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
| 7 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| 8 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
| 9 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
| 11 | #include <linux/ftrace.h> | ||
| 10 | #include <asm/system.h> | 12 | #include <asm/system.h> |
| 13 | #include <asm/apic.h> | ||
| 11 | 14 | ||
| 12 | unsigned long idle_halt; | 15 | unsigned long idle_halt; |
| 13 | EXPORT_SYMBOL(idle_halt); | 16 | EXPORT_SYMBOL(idle_halt); |
| @@ -100,6 +103,9 @@ static inline int hlt_use_halt(void) | |||
| 100 | void default_idle(void) | 103 | void default_idle(void) |
| 101 | { | 104 | { |
| 102 | if (hlt_use_halt()) { | 105 | if (hlt_use_halt()) { |
| 106 | struct power_trace it; | ||
| 107 | |||
| 108 | trace_power_start(&it, POWER_CSTATE, 1); | ||
| 103 | current_thread_info()->status &= ~TS_POLLING; | 109 | current_thread_info()->status &= ~TS_POLLING; |
| 104 | /* | 110 | /* |
| 105 | * TS_POLLING-cleared state must be visible before we | 111 | * TS_POLLING-cleared state must be visible before we |
| @@ -112,6 +118,7 @@ void default_idle(void) | |||
| 112 | else | 118 | else |
| 113 | local_irq_enable(); | 119 | local_irq_enable(); |
| 114 | current_thread_info()->status |= TS_POLLING; | 120 | current_thread_info()->status |= TS_POLLING; |
| 121 | trace_power_end(&it); | ||
| 115 | } else { | 122 | } else { |
| 116 | local_irq_enable(); | 123 | local_irq_enable(); |
| 117 | /* loop is done by the caller */ | 124 | /* loop is done by the caller */ |
| @@ -122,6 +129,21 @@ void default_idle(void) | |||
| 122 | EXPORT_SYMBOL(default_idle); | 129 | EXPORT_SYMBOL(default_idle); |
| 123 | #endif | 130 | #endif |
| 124 | 131 | ||
| 132 | void stop_this_cpu(void *dummy) | ||
| 133 | { | ||
| 134 | local_irq_disable(); | ||
| 135 | /* | ||
| 136 | * Remove this CPU: | ||
| 137 | */ | ||
| 138 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
| 139 | disable_local_APIC(); | ||
| 140 | |||
| 141 | for (;;) { | ||
| 142 | if (hlt_works(smp_processor_id())) | ||
| 143 | halt(); | ||
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 125 | static void do_nothing(void *unused) | 147 | static void do_nothing(void *unused) |
| 126 | { | 148 | { |
| 127 | } | 149 | } |
| @@ -154,24 +176,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
| 154 | */ | 176 | */ |
| 155 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 177 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
| 156 | { | 178 | { |
| 179 | struct power_trace it; | ||
| 180 | |||
| 181 | trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); | ||
| 157 | if (!need_resched()) { | 182 | if (!need_resched()) { |
| 158 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 183 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
| 159 | smp_mb(); | 184 | smp_mb(); |
| 160 | if (!need_resched()) | 185 | if (!need_resched()) |
| 161 | __mwait(ax, cx); | 186 | __mwait(ax, cx); |
| 162 | } | 187 | } |
| 188 | trace_power_end(&it); | ||
| 163 | } | 189 | } |
| 164 | 190 | ||
| 165 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ | 191 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ |
| 166 | static void mwait_idle(void) | 192 | static void mwait_idle(void) |
| 167 | { | 193 | { |
| 194 | struct power_trace it; | ||
| 168 | if (!need_resched()) { | 195 | if (!need_resched()) { |
| 196 | trace_power_start(&it, POWER_CSTATE, 1); | ||
| 169 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 197 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
| 170 | smp_mb(); | 198 | smp_mb(); |
| 171 | if (!need_resched()) | 199 | if (!need_resched()) |
| 172 | __sti_mwait(0, 0); | 200 | __sti_mwait(0, 0); |
| 173 | else | 201 | else |
| 174 | local_irq_enable(); | 202 | local_irq_enable(); |
| 203 | trace_power_end(&it); | ||
| 175 | } else | 204 | } else |
| 176 | local_irq_enable(); | 205 | local_irq_enable(); |
| 177 | } | 206 | } |
| @@ -183,9 +212,13 @@ static void mwait_idle(void) | |||
| 183 | */ | 212 | */ |
| 184 | static void poll_idle(void) | 213 | static void poll_idle(void) |
| 185 | { | 214 | { |
| 215 | struct power_trace it; | ||
| 216 | |||
| 217 | trace_power_start(&it, POWER_CSTATE, 0); | ||
| 186 | local_irq_enable(); | 218 | local_irq_enable(); |
| 187 | while (!need_resched()) | 219 | while (!need_resched()) |
| 188 | cpu_relax(); | 220 | cpu_relax(); |
| 221 | trace_power_end(&it); | ||
| 189 | } | 222 | } |
| 190 | 223 | ||
| 191 | /* | 224 | /* |
| @@ -270,7 +303,7 @@ static void c1e_idle(void) | |||
| 270 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 303 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
| 271 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 304 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
| 272 | c1e_detected = 1; | 305 | c1e_detected = 1; |
| 273 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 306 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
| 274 | mark_tsc_unstable("TSC halt in AMD C1E"); | 307 | mark_tsc_unstable("TSC halt in AMD C1E"); |
| 275 | printk(KERN_INFO "System has AMD C1E enabled\n"); | 308 | printk(KERN_INFO "System has AMD C1E enabled\n"); |
| 276 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | 309 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0a1302fe6d45..3ba155d24884 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include <linux/percpu.h> | 38 | #include <linux/percpu.h> |
| 39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
| 40 | #include <linux/dmi.h> | 40 | #include <linux/dmi.h> |
| 41 | #include <linux/ftrace.h> | ||
| 41 | 42 | ||
| 42 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
| 43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
| @@ -59,6 +60,7 @@ | |||
| 59 | #include <asm/idle.h> | 60 | #include <asm/idle.h> |
| 60 | #include <asm/syscalls.h> | 61 | #include <asm/syscalls.h> |
| 61 | #include <asm/smp.h> | 62 | #include <asm/smp.h> |
| 63 | #include <asm/ds.h> | ||
| 62 | 64 | ||
| 63 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 65 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
| 64 | 66 | ||
| @@ -250,14 +252,8 @@ void exit_thread(void) | |||
| 250 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 252 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
| 251 | put_cpu(); | 253 | put_cpu(); |
| 252 | } | 254 | } |
| 253 | #ifdef CONFIG_X86_DS | 255 | |
| 254 | /* Free any DS contexts that have not been properly released. */ | 256 | ds_exit_thread(current); |
| 255 | if (unlikely(current->thread.ds_ctx)) { | ||
| 256 | /* we clear debugctl to make sure DS is not used. */ | ||
| 257 | update_debugctlmsr(0); | ||
| 258 | ds_free(current->thread.ds_ctx); | ||
| 259 | } | ||
| 260 | #endif /* CONFIG_X86_DS */ | ||
| 261 | } | 257 | } |
| 262 | 258 | ||
| 263 | void flush_thread(void) | 259 | void flush_thread(void) |
| @@ -339,6 +335,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
| 339 | kfree(p->thread.io_bitmap_ptr); | 335 | kfree(p->thread.io_bitmap_ptr); |
| 340 | p->thread.io_bitmap_max = 0; | 336 | p->thread.io_bitmap_max = 0; |
| 341 | } | 337 | } |
| 338 | |||
| 339 | ds_copy_thread(p, current); | ||
| 340 | |||
| 341 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
| 342 | p->thread.debugctlmsr = 0; | ||
| 343 | |||
| 342 | return err; | 344 | return err; |
| 343 | } | 345 | } |
| 344 | 346 | ||
| @@ -419,48 +421,19 @@ int set_tsc_mode(unsigned int val) | |||
| 419 | return 0; | 421 | return 0; |
| 420 | } | 422 | } |
| 421 | 423 | ||
| 422 | #ifdef CONFIG_X86_DS | ||
| 423 | static int update_debugctl(struct thread_struct *prev, | ||
| 424 | struct thread_struct *next, unsigned long debugctl) | ||
| 425 | { | ||
| 426 | unsigned long ds_prev = 0; | ||
| 427 | unsigned long ds_next = 0; | ||
| 428 | |||
| 429 | if (prev->ds_ctx) | ||
| 430 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
| 431 | if (next->ds_ctx) | ||
| 432 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
| 433 | |||
| 434 | if (ds_next != ds_prev) { | ||
| 435 | /* we clear debugctl to make sure DS | ||
| 436 | * is not in use when we change it */ | ||
| 437 | debugctl = 0; | ||
| 438 | update_debugctlmsr(0); | ||
| 439 | wrmsr(MSR_IA32_DS_AREA, ds_next, 0); | ||
| 440 | } | ||
| 441 | return debugctl; | ||
| 442 | } | ||
| 443 | #else | ||
| 444 | static int update_debugctl(struct thread_struct *prev, | ||
| 445 | struct thread_struct *next, unsigned long debugctl) | ||
| 446 | { | ||
| 447 | return debugctl; | ||
| 448 | } | ||
| 449 | #endif /* CONFIG_X86_DS */ | ||
| 450 | |||
| 451 | static noinline void | 424 | static noinline void |
| 452 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 425 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
| 453 | struct tss_struct *tss) | 426 | struct tss_struct *tss) |
| 454 | { | 427 | { |
| 455 | struct thread_struct *prev, *next; | 428 | struct thread_struct *prev, *next; |
| 456 | unsigned long debugctl; | ||
| 457 | 429 | ||
| 458 | prev = &prev_p->thread; | 430 | prev = &prev_p->thread; |
| 459 | next = &next_p->thread; | 431 | next = &next_p->thread; |
| 460 | 432 | ||
| 461 | debugctl = update_debugctl(prev, next, prev->debugctlmsr); | 433 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || |
| 462 | 434 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | |
| 463 | if (next->debugctlmsr != debugctl) | 435 | ds_switch_to(prev_p, next_p); |
| 436 | else if (next->debugctlmsr != prev->debugctlmsr) | ||
| 464 | update_debugctlmsr(next->debugctlmsr); | 437 | update_debugctlmsr(next->debugctlmsr); |
| 465 | 438 | ||
| 466 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 439 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
| @@ -482,15 +455,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 482 | hard_enable_TSC(); | 455 | hard_enable_TSC(); |
| 483 | } | 456 | } |
| 484 | 457 | ||
| 485 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 486 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
| 487 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
| 488 | |||
| 489 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
| 490 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
| 491 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 492 | |||
| 493 | |||
| 494 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 458 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
| 495 | /* | 459 | /* |
| 496 | * Disable the bitmap via an invalid offset. We still cache | 460 | * Disable the bitmap via an invalid offset. We still cache |
| @@ -548,7 +512,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 548 | * the task-switch, and shows up in ret_from_fork in entry.S, | 512 | * the task-switch, and shows up in ret_from_fork in entry.S, |
| 549 | * for example. | 513 | * for example. |
| 550 | */ | 514 | */ |
| 551 | struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 515 | __notrace_funcgraph struct task_struct * |
| 516 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
| 552 | { | 517 | { |
| 553 | struct thread_struct *prev = &prev_p->thread, | 518 | struct thread_struct *prev = &prev_p->thread, |
| 554 | *next = &next_p->thread; | 519 | *next = &next_p->thread; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c958120fb1b6..416fb9282f4f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
| 40 | #include <linux/uaccess.h> | 40 | #include <linux/uaccess.h> |
| 41 | #include <linux/io.h> | 41 | #include <linux/io.h> |
| 42 | #include <linux/ftrace.h> | ||
| 42 | 43 | ||
| 43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
| 44 | #include <asm/system.h> | 45 | #include <asm/system.h> |
| @@ -52,6 +53,7 @@ | |||
| 52 | #include <asm/ia32.h> | 53 | #include <asm/ia32.h> |
| 53 | #include <asm/idle.h> | 54 | #include <asm/idle.h> |
| 54 | #include <asm/syscalls.h> | 55 | #include <asm/syscalls.h> |
| 56 | #include <asm/ds.h> | ||
| 55 | 57 | ||
| 56 | asmlinkage extern void ret_from_fork(void); | 58 | asmlinkage extern void ret_from_fork(void); |
| 57 | 59 | ||
| @@ -235,14 +237,8 @@ void exit_thread(void) | |||
| 235 | t->io_bitmap_max = 0; | 237 | t->io_bitmap_max = 0; |
| 236 | put_cpu(); | 238 | put_cpu(); |
| 237 | } | 239 | } |
| 238 | #ifdef CONFIG_X86_DS | 240 | |
| 239 | /* Free any DS contexts that have not been properly released. */ | 241 | ds_exit_thread(current); |
| 240 | if (unlikely(t->ds_ctx)) { | ||
| 241 | /* we clear debugctl to make sure DS is not used. */ | ||
| 242 | update_debugctlmsr(0); | ||
| 243 | ds_free(t->ds_ctx); | ||
| 244 | } | ||
| 245 | #endif /* CONFIG_X86_DS */ | ||
| 246 | } | 242 | } |
| 247 | 243 | ||
| 248 | void flush_thread(void) | 244 | void flush_thread(void) |
| @@ -372,6 +368,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
| 372 | if (err) | 368 | if (err) |
| 373 | goto out; | 369 | goto out; |
| 374 | } | 370 | } |
| 371 | |||
| 372 | ds_copy_thread(p, me); | ||
| 373 | |||
| 374 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
| 375 | p->thread.debugctlmsr = 0; | ||
| 376 | |||
| 375 | err = 0; | 377 | err = 0; |
| 376 | out: | 378 | out: |
| 377 | if (err && p->thread.io_bitmap_ptr) { | 379 | if (err && p->thread.io_bitmap_ptr) { |
| @@ -470,35 +472,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 470 | struct tss_struct *tss) | 472 | struct tss_struct *tss) |
| 471 | { | 473 | { |
| 472 | struct thread_struct *prev, *next; | 474 | struct thread_struct *prev, *next; |
| 473 | unsigned long debugctl; | ||
| 474 | 475 | ||
| 475 | prev = &prev_p->thread, | 476 | prev = &prev_p->thread, |
| 476 | next = &next_p->thread; | 477 | next = &next_p->thread; |
| 477 | 478 | ||
| 478 | debugctl = prev->debugctlmsr; | 479 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || |
| 479 | 480 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | |
| 480 | #ifdef CONFIG_X86_DS | 481 | ds_switch_to(prev_p, next_p); |
| 481 | { | 482 | else if (next->debugctlmsr != prev->debugctlmsr) |
| 482 | unsigned long ds_prev = 0, ds_next = 0; | ||
| 483 | |||
| 484 | if (prev->ds_ctx) | ||
| 485 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
| 486 | if (next->ds_ctx) | ||
| 487 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
| 488 | |||
| 489 | if (ds_next != ds_prev) { | ||
| 490 | /* | ||
| 491 | * We clear debugctl to make sure DS | ||
| 492 | * is not in use when we change it: | ||
| 493 | */ | ||
| 494 | debugctl = 0; | ||
| 495 | update_debugctlmsr(0); | ||
| 496 | wrmsrl(MSR_IA32_DS_AREA, ds_next); | ||
| 497 | } | ||
| 498 | } | ||
| 499 | #endif /* CONFIG_X86_DS */ | ||
| 500 | |||
| 501 | if (next->debugctlmsr != debugctl) | ||
| 502 | update_debugctlmsr(next->debugctlmsr); | 483 | update_debugctlmsr(next->debugctlmsr); |
| 503 | 484 | ||
| 504 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 485 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
| @@ -533,14 +514,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 533 | */ | 514 | */ |
| 534 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 515 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
| 535 | } | 516 | } |
| 536 | |||
| 537 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 538 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
| 539 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
| 540 | |||
| 541 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
| 542 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
| 543 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 544 | } | 517 | } |
| 545 | 518 | ||
| 546 | /* | 519 | /* |
| @@ -551,8 +524,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 551 | * - could test fs/gs bitsliced | 524 | * - could test fs/gs bitsliced |
| 552 | * | 525 | * |
| 553 | * Kprobes not supported here. Set the probe on schedule instead. | 526 | * Kprobes not supported here. Set the probe on schedule instead. |
| 527 | * Function graph tracer not supported too. | ||
| 554 | */ | 528 | */ |
| 555 | struct task_struct * | 529 | __notrace_funcgraph struct task_struct * |
| 556 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 530 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
| 557 | { | 531 | { |
| 558 | struct thread_struct *prev = &prev_p->thread; | 532 | struct thread_struct *prev = &prev_p->thread; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a6d8c12e10d..0a5df5f82fb9 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -581,158 +581,91 @@ static int ioperm_get(struct task_struct *target, | |||
| 581 | } | 581 | } |
| 582 | 582 | ||
| 583 | #ifdef CONFIG_X86_PTRACE_BTS | 583 | #ifdef CONFIG_X86_PTRACE_BTS |
| 584 | /* | ||
| 585 | * The configuration for a particular BTS hardware implementation. | ||
| 586 | */ | ||
| 587 | struct bts_configuration { | ||
| 588 | /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ | ||
| 589 | unsigned char sizeof_bts; | ||
| 590 | /* the size of a field in the BTS record in bytes */ | ||
| 591 | unsigned char sizeof_field; | ||
| 592 | /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ | ||
| 593 | unsigned long debugctl_mask; | ||
| 594 | }; | ||
| 595 | static struct bts_configuration bts_cfg; | ||
| 596 | |||
| 597 | #define BTS_MAX_RECORD_SIZE (8 * 3) | ||
| 598 | |||
| 599 | |||
| 600 | /* | ||
| 601 | * Branch Trace Store (BTS) uses the following format. Different | ||
| 602 | * architectures vary in the size of those fields. | ||
| 603 | * - source linear address | ||
| 604 | * - destination linear address | ||
| 605 | * - flags | ||
| 606 | * | ||
| 607 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 608 | * architectures use 32bit pointers in 32bit mode. | ||
| 609 | * | ||
| 610 | * We compute the base address for the first 8 fields based on: | ||
| 611 | * - the field size stored in the DS configuration | ||
| 612 | * - the relative field position | ||
| 613 | * | ||
| 614 | * In order to store additional information in the BTS buffer, we use | ||
| 615 | * a special source address to indicate that the record requires | ||
| 616 | * special interpretation. | ||
| 617 | * | ||
| 618 | * Netburst indicated via a bit in the flags field whether the branch | ||
| 619 | * was predicted; this is ignored. | ||
| 620 | */ | ||
| 621 | |||
| 622 | enum bts_field { | ||
| 623 | bts_from = 0, | ||
| 624 | bts_to, | ||
| 625 | bts_flags, | ||
| 626 | |||
| 627 | bts_escape = (unsigned long)-1, | ||
| 628 | bts_qual = bts_to, | ||
| 629 | bts_jiffies = bts_flags | ||
| 630 | }; | ||
| 631 | |||
| 632 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
| 633 | { | ||
| 634 | base += (bts_cfg.sizeof_field * field); | ||
| 635 | return *(unsigned long *)base; | ||
| 636 | } | ||
| 637 | |||
| 638 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | ||
| 639 | { | ||
| 640 | base += (bts_cfg.sizeof_field * field);; | ||
| 641 | (*(unsigned long *)base) = val; | ||
| 642 | } | ||
| 643 | |||
| 644 | /* | ||
| 645 | * Translate a BTS record from the raw format into the bts_struct format | ||
| 646 | * | ||
| 647 | * out (out): bts_struct interpretation | ||
| 648 | * raw: raw BTS record | ||
| 649 | */ | ||
| 650 | static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) | ||
| 651 | { | ||
| 652 | memset(out, 0, sizeof(*out)); | ||
| 653 | if (bts_get(raw, bts_from) == bts_escape) { | ||
| 654 | out->qualifier = bts_get(raw, bts_qual); | ||
| 655 | out->variant.jiffies = bts_get(raw, bts_jiffies); | ||
| 656 | } else { | ||
| 657 | out->qualifier = BTS_BRANCH; | ||
| 658 | out->variant.lbr.from_ip = bts_get(raw, bts_from); | ||
| 659 | out->variant.lbr.to_ip = bts_get(raw, bts_to); | ||
| 660 | } | ||
| 661 | } | ||
| 662 | |||
| 663 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | 584 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
| 664 | struct bts_struct __user *out) | 585 | struct bts_struct __user *out) |
| 665 | { | 586 | { |
| 666 | struct bts_struct ret; | 587 | const struct bts_trace *trace; |
| 667 | const void *bts_record; | 588 | struct bts_struct bts; |
| 668 | size_t bts_index, bts_end; | 589 | const unsigned char *at; |
| 669 | int error; | 590 | int error; |
| 670 | 591 | ||
| 671 | error = ds_get_bts_end(child, &bts_end); | 592 | trace = ds_read_bts(child->bts); |
| 672 | if (error < 0) | 593 | if (!trace) |
| 673 | return error; | 594 | return -EPERM; |
| 674 | |||
| 675 | if (bts_end <= index) | ||
| 676 | return -EINVAL; | ||
| 677 | 595 | ||
| 678 | error = ds_get_bts_index(child, &bts_index); | 596 | at = trace->ds.top - ((index + 1) * trace->ds.size); |
| 679 | if (error < 0) | 597 | if ((void *)at < trace->ds.begin) |
| 680 | return error; | 598 | at += (trace->ds.n * trace->ds.size); |
| 681 | 599 | ||
| 682 | /* translate the ptrace bts index into the ds bts index */ | 600 | if (!trace->read) |
| 683 | bts_index += bts_end - (index + 1); | 601 | return -EOPNOTSUPP; |
| 684 | if (bts_end <= bts_index) | ||
| 685 | bts_index -= bts_end; | ||
| 686 | 602 | ||
| 687 | error = ds_access_bts(child, bts_index, &bts_record); | 603 | error = trace->read(child->bts, at, &bts); |
| 688 | if (error < 0) | 604 | if (error < 0) |
| 689 | return error; | 605 | return error; |
| 690 | 606 | ||
| 691 | ptrace_bts_translate_record(&ret, bts_record); | 607 | if (copy_to_user(out, &bts, sizeof(bts))) |
| 692 | |||
| 693 | if (copy_to_user(out, &ret, sizeof(ret))) | ||
| 694 | return -EFAULT; | 608 | return -EFAULT; |
| 695 | 609 | ||
| 696 | return sizeof(ret); | 610 | return sizeof(bts); |
| 697 | } | 611 | } |
| 698 | 612 | ||
| 699 | static int ptrace_bts_drain(struct task_struct *child, | 613 | static int ptrace_bts_drain(struct task_struct *child, |
| 700 | long size, | 614 | long size, |
| 701 | struct bts_struct __user *out) | 615 | struct bts_struct __user *out) |
| 702 | { | 616 | { |
| 703 | struct bts_struct ret; | 617 | const struct bts_trace *trace; |
| 704 | const unsigned char *raw; | 618 | const unsigned char *at; |
| 705 | size_t end, i; | 619 | int error, drained = 0; |
| 706 | int error; | ||
| 707 | 620 | ||
| 708 | error = ds_get_bts_index(child, &end); | 621 | trace = ds_read_bts(child->bts); |
| 709 | if (error < 0) | 622 | if (!trace) |
| 710 | return error; | 623 | return -EPERM; |
| 711 | 624 | ||
| 712 | if (size < (end * sizeof(struct bts_struct))) | 625 | if (!trace->read) |
| 626 | return -EOPNOTSUPP; | ||
| 627 | |||
| 628 | if (size < (trace->ds.top - trace->ds.begin)) | ||
| 713 | return -EIO; | 629 | return -EIO; |
| 714 | 630 | ||
| 715 | error = ds_access_bts(child, 0, (const void **)&raw); | 631 | for (at = trace->ds.begin; (void *)at < trace->ds.top; |
| 716 | if (error < 0) | 632 | out++, drained++, at += trace->ds.size) { |
| 717 | return error; | 633 | struct bts_struct bts; |
| 634 | int error; | ||
| 718 | 635 | ||
| 719 | for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { | 636 | error = trace->read(child->bts, at, &bts); |
| 720 | ptrace_bts_translate_record(&ret, raw); | 637 | if (error < 0) |
| 638 | return error; | ||
| 721 | 639 | ||
| 722 | if (copy_to_user(out, &ret, sizeof(ret))) | 640 | if (copy_to_user(out, &bts, sizeof(bts))) |
| 723 | return -EFAULT; | 641 | return -EFAULT; |
| 724 | } | 642 | } |
| 725 | 643 | ||
| 726 | error = ds_clear_bts(child); | 644 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
| 645 | |||
| 646 | error = ds_reset_bts(child->bts); | ||
| 727 | if (error < 0) | 647 | if (error < 0) |
| 728 | return error; | 648 | return error; |
| 729 | 649 | ||
| 730 | return end; | 650 | return drained; |
| 731 | } | 651 | } |
| 732 | 652 | ||
| 733 | static void ptrace_bts_ovfl(struct task_struct *child) | 653 | static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) |
| 734 | { | 654 | { |
| 735 | send_sig(child->thread.bts_ovfl_signal, child, 0); | 655 | child->bts_buffer = alloc_locked_buffer(size); |
| 656 | if (!child->bts_buffer) | ||
| 657 | return -ENOMEM; | ||
| 658 | |||
| 659 | child->bts_size = size; | ||
| 660 | |||
| 661 | return 0; | ||
| 662 | } | ||
| 663 | |||
| 664 | static void ptrace_bts_free_buffer(struct task_struct *child) | ||
| 665 | { | ||
| 666 | free_locked_buffer(child->bts_buffer, child->bts_size); | ||
| 667 | child->bts_buffer = NULL; | ||
| 668 | child->bts_size = 0; | ||
| 736 | } | 669 | } |
| 737 | 670 | ||
| 738 | static int ptrace_bts_config(struct task_struct *child, | 671 | static int ptrace_bts_config(struct task_struct *child, |
| @@ -740,114 +673,86 @@ static int ptrace_bts_config(struct task_struct *child, | |||
| 740 | const struct ptrace_bts_config __user *ucfg) | 673 | const struct ptrace_bts_config __user *ucfg) |
| 741 | { | 674 | { |
| 742 | struct ptrace_bts_config cfg; | 675 | struct ptrace_bts_config cfg; |
| 743 | int error = 0; | 676 | unsigned int flags = 0; |
| 744 | |||
| 745 | error = -EOPNOTSUPP; | ||
| 746 | if (!bts_cfg.sizeof_bts) | ||
| 747 | goto errout; | ||
| 748 | 677 | ||
| 749 | error = -EIO; | ||
| 750 | if (cfg_size < sizeof(cfg)) | 678 | if (cfg_size < sizeof(cfg)) |
| 751 | goto errout; | 679 | return -EIO; |
| 752 | 680 | ||
| 753 | error = -EFAULT; | ||
| 754 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 681 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
| 755 | goto errout; | 682 | return -EFAULT; |
| 756 | 683 | ||
| 757 | error = -EINVAL; | 684 | if (child->bts) { |
| 758 | if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && | 685 | ds_release_bts(child->bts); |
| 759 | !(cfg.flags & PTRACE_BTS_O_ALLOC)) | 686 | child->bts = NULL; |
| 760 | goto errout; | 687 | } |
| 761 | 688 | ||
| 762 | if (cfg.flags & PTRACE_BTS_O_ALLOC) { | 689 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
| 763 | ds_ovfl_callback_t ovfl = NULL; | 690 | if (!cfg.signal) |
| 764 | unsigned int sig = 0; | 691 | return -EINVAL; |
| 765 | 692 | ||
| 766 | /* we ignore the error in case we were not tracing child */ | 693 | return -EOPNOTSUPP; |
| 767 | (void)ds_release_bts(child); | ||
| 768 | 694 | ||
| 769 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | 695 | child->thread.bts_ovfl_signal = cfg.signal; |
| 770 | if (!cfg.signal) | 696 | } |
| 771 | goto errout; | ||
| 772 | 697 | ||
| 773 | sig = cfg.signal; | 698 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && |
| 774 | ovfl = ptrace_bts_ovfl; | 699 | (cfg.size != child->bts_size)) { |
| 775 | } | 700 | int error; |
| 776 | 701 | ||
| 777 | error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); | 702 | ptrace_bts_free_buffer(child); |
| 778 | if (error < 0) | ||
| 779 | goto errout; | ||
| 780 | 703 | ||
| 781 | child->thread.bts_ovfl_signal = sig; | 704 | error = ptrace_bts_allocate_buffer(child, cfg.size); |
| 705 | if (error < 0) | ||
| 706 | return error; | ||
| 782 | } | 707 | } |
| 783 | 708 | ||
| 784 | error = -EINVAL; | ||
| 785 | if (!child->thread.ds_ctx && cfg.flags) | ||
| 786 | goto errout; | ||
| 787 | |||
| 788 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 709 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
| 789 | child->thread.debugctlmsr |= bts_cfg.debugctl_mask; | 710 | flags |= BTS_USER; |
| 790 | else | ||
| 791 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
| 792 | 711 | ||
| 793 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 712 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
| 794 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 713 | flags |= BTS_TIMESTAMPS; |
| 795 | else | ||
| 796 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
| 797 | 714 | ||
| 798 | error = sizeof(cfg); | 715 | child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, |
| 716 | /* ovfl = */ NULL, /* th = */ (size_t)-1, | ||
| 717 | flags); | ||
| 718 | if (IS_ERR(child->bts)) { | ||
| 719 | int error = PTR_ERR(child->bts); | ||
| 799 | 720 | ||
| 800 | out: | 721 | ptrace_bts_free_buffer(child); |
| 801 | if (child->thread.debugctlmsr) | 722 | child->bts = NULL; |
| 802 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
| 803 | else | ||
| 804 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
| 805 | 723 | ||
| 806 | return error; | 724 | return error; |
| 725 | } | ||
| 807 | 726 | ||
| 808 | errout: | 727 | return sizeof(cfg); |
| 809 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
| 810 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
| 811 | goto out; | ||
| 812 | } | 728 | } |
| 813 | 729 | ||
| 814 | static int ptrace_bts_status(struct task_struct *child, | 730 | static int ptrace_bts_status(struct task_struct *child, |
| 815 | long cfg_size, | 731 | long cfg_size, |
| 816 | struct ptrace_bts_config __user *ucfg) | 732 | struct ptrace_bts_config __user *ucfg) |
| 817 | { | 733 | { |
| 734 | const struct bts_trace *trace; | ||
| 818 | struct ptrace_bts_config cfg; | 735 | struct ptrace_bts_config cfg; |
| 819 | size_t end; | ||
| 820 | const void *base, *max; | ||
| 821 | int error; | ||
| 822 | 736 | ||
| 823 | if (cfg_size < sizeof(cfg)) | 737 | if (cfg_size < sizeof(cfg)) |
| 824 | return -EIO; | 738 | return -EIO; |
| 825 | 739 | ||
| 826 | error = ds_get_bts_end(child, &end); | 740 | trace = ds_read_bts(child->bts); |
| 827 | if (error < 0) | 741 | if (!trace) |
| 828 | return error; | 742 | return -EPERM; |
| 829 | |||
| 830 | error = ds_access_bts(child, /* index = */ 0, &base); | ||
| 831 | if (error < 0) | ||
| 832 | return error; | ||
| 833 | |||
| 834 | error = ds_access_bts(child, /* index = */ end, &max); | ||
| 835 | if (error < 0) | ||
| 836 | return error; | ||
| 837 | 743 | ||
| 838 | memset(&cfg, 0, sizeof(cfg)); | 744 | memset(&cfg, 0, sizeof(cfg)); |
| 839 | cfg.size = (max - base); | 745 | cfg.size = trace->ds.end - trace->ds.begin; |
| 840 | cfg.signal = child->thread.bts_ovfl_signal; | 746 | cfg.signal = child->thread.bts_ovfl_signal; |
| 841 | cfg.bts_size = sizeof(struct bts_struct); | 747 | cfg.bts_size = sizeof(struct bts_struct); |
| 842 | 748 | ||
| 843 | if (cfg.signal) | 749 | if (cfg.signal) |
| 844 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 750 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
| 845 | 751 | ||
| 846 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | 752 | if (trace->ds.flags & BTS_USER) |
| 847 | child->thread.debugctlmsr & bts_cfg.debugctl_mask) | ||
| 848 | cfg.flags |= PTRACE_BTS_O_TRACE; | 753 | cfg.flags |= PTRACE_BTS_O_TRACE; |
| 849 | 754 | ||
| 850 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | 755 | if (trace->ds.flags & BTS_TIMESTAMPS) |
| 851 | cfg.flags |= PTRACE_BTS_O_SCHED; | 756 | cfg.flags |= PTRACE_BTS_O_SCHED; |
| 852 | 757 | ||
| 853 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | 758 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) |
| @@ -856,110 +761,77 @@ static int ptrace_bts_status(struct task_struct *child, | |||
| 856 | return sizeof(cfg); | 761 | return sizeof(cfg); |
| 857 | } | 762 | } |
| 858 | 763 | ||
| 859 | static int ptrace_bts_write_record(struct task_struct *child, | 764 | static int ptrace_bts_clear(struct task_struct *child) |
| 860 | const struct bts_struct *in) | ||
| 861 | { | 765 | { |
| 862 | unsigned char bts_record[BTS_MAX_RECORD_SIZE]; | 766 | const struct bts_trace *trace; |
| 863 | 767 | ||
| 864 | BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); | 768 | trace = ds_read_bts(child->bts); |
| 769 | if (!trace) | ||
| 770 | return -EPERM; | ||
| 865 | 771 | ||
| 866 | memset(bts_record, 0, bts_cfg.sizeof_bts); | 772 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
| 867 | switch (in->qualifier) { | ||
| 868 | case BTS_INVALID: | ||
| 869 | break; | ||
| 870 | 773 | ||
| 871 | case BTS_BRANCH: | 774 | return ds_reset_bts(child->bts); |
| 872 | bts_set(bts_record, bts_from, in->variant.lbr.from_ip); | 775 | } |
| 873 | bts_set(bts_record, bts_to, in->variant.lbr.to_ip); | ||
| 874 | break; | ||
| 875 | 776 | ||
| 876 | case BTS_TASK_ARRIVES: | 777 | static int ptrace_bts_size(struct task_struct *child) |
| 877 | case BTS_TASK_DEPARTS: | 778 | { |
| 878 | bts_set(bts_record, bts_from, bts_escape); | 779 | const struct bts_trace *trace; |
| 879 | bts_set(bts_record, bts_qual, in->qualifier); | ||
| 880 | bts_set(bts_record, bts_jiffies, in->variant.jiffies); | ||
| 881 | break; | ||
| 882 | 780 | ||
| 883 | default: | 781 | trace = ds_read_bts(child->bts); |
| 884 | return -EINVAL; | 782 | if (!trace) |
| 885 | } | 783 | return -EPERM; |
| 886 | 784 | ||
| 887 | /* The writing task will be the switched-to task on a context | 785 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; |
| 888 | * switch. It needs to write into the switched-from task's BTS | ||
| 889 | * buffer. */ | ||
| 890 | return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); | ||
| 891 | } | 786 | } |
| 892 | 787 | ||
| 893 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 788 | static void ptrace_bts_fork(struct task_struct *tsk) |
| 894 | enum bts_qualifier qualifier) | ||
| 895 | { | 789 | { |
| 896 | struct bts_struct rec = { | 790 | tsk->bts = NULL; |
| 897 | .qualifier = qualifier, | 791 | tsk->bts_buffer = NULL; |
| 898 | .variant.jiffies = jiffies_64 | 792 | tsk->bts_size = 0; |
| 899 | }; | 793 | tsk->thread.bts_ovfl_signal = 0; |
| 900 | |||
| 901 | ptrace_bts_write_record(tsk, &rec); | ||
| 902 | } | 794 | } |
| 903 | 795 | ||
| 904 | static const struct bts_configuration bts_cfg_netburst = { | 796 | static void ptrace_bts_untrace(struct task_struct *child) |
| 905 | .sizeof_bts = sizeof(long) * 3, | 797 | { |
| 906 | .sizeof_field = sizeof(long), | 798 | if (unlikely(child->bts)) { |
| 907 | .debugctl_mask = (1<<2)|(1<<3)|(1<<5) | 799 | ds_release_bts(child->bts); |
| 908 | }; | 800 | child->bts = NULL; |
| 801 | |||
| 802 | /* We cannot update total_vm and locked_vm since | ||
| 803 | child's mm is already gone. But we can reclaim the | ||
| 804 | memory. */ | ||
| 805 | kfree(child->bts_buffer); | ||
| 806 | child->bts_buffer = NULL; | ||
| 807 | child->bts_size = 0; | ||
| 808 | } | ||
| 809 | } | ||
| 909 | 810 | ||
| 910 | static const struct bts_configuration bts_cfg_pentium_m = { | 811 | static void ptrace_bts_detach(struct task_struct *child) |
| 911 | .sizeof_bts = sizeof(long) * 3, | 812 | { |
| 912 | .sizeof_field = sizeof(long), | 813 | if (unlikely(child->bts)) { |
| 913 | .debugctl_mask = (1<<6)|(1<<7) | 814 | ds_release_bts(child->bts); |
| 914 | }; | 815 | child->bts = NULL; |
| 915 | 816 | ||
| 916 | static const struct bts_configuration bts_cfg_core2 = { | 817 | ptrace_bts_free_buffer(child); |
| 917 | .sizeof_bts = 8 * 3, | 818 | } |
| 918 | .sizeof_field = 8, | 819 | } |
| 919 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | 820 | #else |
| 920 | }; | 821 | static inline void ptrace_bts_fork(struct task_struct *tsk) {} |
| 822 | static inline void ptrace_bts_detach(struct task_struct *child) {} | ||
| 823 | static inline void ptrace_bts_untrace(struct task_struct *child) {} | ||
| 824 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 921 | 825 | ||
| 922 | static inline void bts_configure(const struct bts_configuration *cfg) | 826 | void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) |
| 923 | { | 827 | { |
| 924 | bts_cfg = *cfg; | 828 | ptrace_bts_fork(child); |
| 925 | } | 829 | } |
| 926 | 830 | ||
| 927 | void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | 831 | void x86_ptrace_untrace(struct task_struct *child) |
| 928 | { | 832 | { |
| 929 | switch (c->x86) { | 833 | ptrace_bts_untrace(child); |
| 930 | case 0x6: | ||
| 931 | switch (c->x86_model) { | ||
| 932 | case 0xD: | ||
| 933 | case 0xE: /* Pentium M */ | ||
| 934 | bts_configure(&bts_cfg_pentium_m); | ||
| 935 | break; | ||
| 936 | case 0xF: /* Core2 */ | ||
| 937 | case 0x1C: /* Atom */ | ||
| 938 | bts_configure(&bts_cfg_core2); | ||
| 939 | break; | ||
| 940 | default: | ||
| 941 | /* sorry, don't know about them */ | ||
| 942 | break; | ||
| 943 | } | ||
| 944 | break; | ||
| 945 | case 0xF: | ||
| 946 | switch (c->x86_model) { | ||
| 947 | case 0x0: | ||
| 948 | case 0x1: | ||
| 949 | case 0x2: /* Netburst */ | ||
| 950 | bts_configure(&bts_cfg_netburst); | ||
| 951 | break; | ||
| 952 | default: | ||
| 953 | /* sorry, don't know about them */ | ||
| 954 | break; | ||
| 955 | } | ||
| 956 | break; | ||
| 957 | default: | ||
| 958 | /* sorry, don't know about them */ | ||
| 959 | break; | ||
| 960 | } | ||
| 961 | } | 834 | } |
| 962 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 963 | 835 | ||
| 964 | /* | 836 | /* |
| 965 | * Called by kernel/ptrace.c when detaching.. | 837 | * Called by kernel/ptrace.c when detaching.. |
| @@ -972,15 +844,7 @@ void ptrace_disable(struct task_struct *child) | |||
| 972 | #ifdef TIF_SYSCALL_EMU | 844 | #ifdef TIF_SYSCALL_EMU |
| 973 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 845 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
| 974 | #endif | 846 | #endif |
| 975 | #ifdef CONFIG_X86_PTRACE_BTS | 847 | ptrace_bts_detach(child); |
| 976 | (void)ds_release_bts(child); | ||
| 977 | |||
| 978 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
| 979 | if (!child->thread.debugctlmsr) | ||
| 980 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
| 981 | |||
| 982 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
| 983 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 984 | } | 848 | } |
| 985 | 849 | ||
| 986 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 850 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
| @@ -1112,7 +976,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 1112 | break; | 976 | break; |
| 1113 | 977 | ||
| 1114 | case PTRACE_BTS_SIZE: | 978 | case PTRACE_BTS_SIZE: |
| 1115 | ret = ds_get_bts_index(child, /* pos = */ NULL); | 979 | ret = ptrace_bts_size(child); |
| 1116 | break; | 980 | break; |
| 1117 | 981 | ||
| 1118 | case PTRACE_BTS_GET: | 982 | case PTRACE_BTS_GET: |
| @@ -1121,7 +985,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 1121 | break; | 985 | break; |
| 1122 | 986 | ||
| 1123 | case PTRACE_BTS_CLEAR: | 987 | case PTRACE_BTS_CLEAR: |
| 1124 | ret = ds_clear_bts(child); | 988 | ret = ptrace_bts_clear(child); |
| 1125 | break; | 989 | break; |
| 1126 | 990 | ||
| 1127 | case PTRACE_BTS_DRAIN: | 991 | case PTRACE_BTS_DRAIN: |
| @@ -1384,6 +1248,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
| 1384 | 1248 | ||
| 1385 | case PTRACE_GET_THREAD_AREA: | 1249 | case PTRACE_GET_THREAD_AREA: |
| 1386 | case PTRACE_SET_THREAD_AREA: | 1250 | case PTRACE_SET_THREAD_AREA: |
| 1251 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 1252 | case PTRACE_BTS_CONFIG: | ||
| 1253 | case PTRACE_BTS_STATUS: | ||
| 1254 | case PTRACE_BTS_SIZE: | ||
| 1255 | case PTRACE_BTS_GET: | ||
| 1256 | case PTRACE_BTS_CLEAR: | ||
| 1257 | case PTRACE_BTS_DRAIN: | ||
| 1258 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 1387 | return arch_ptrace(child, request, addr, data); | 1259 | return arch_ptrace(child, request, addr, data); |
| 1388 | 1260 | ||
| 1389 | default: | 1261 | default: |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index cc5a2545dd41..61f718df6eec 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
| @@ -21,6 +21,9 @@ | |||
| 21 | # include <asm/iommu.h> | 21 | # include <asm/iommu.h> |
| 22 | #endif | 22 | #endif |
| 23 | 23 | ||
| 24 | #include <mach_ipi.h> | ||
| 25 | |||
| 26 | |||
| 24 | /* | 27 | /* |
| 25 | * Power off function, if any | 28 | * Power off function, if any |
| 26 | */ | 29 | */ |
| @@ -36,7 +39,10 @@ int reboot_force; | |||
| 36 | static int reboot_cpu = -1; | 39 | static int reboot_cpu = -1; |
| 37 | #endif | 40 | #endif |
| 38 | 41 | ||
| 39 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | 42 | /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ |
| 43 | bool port_cf9_safe = false; | ||
| 44 | |||
| 45 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] | ||
| 40 | warm Don't set the cold reboot flag | 46 | warm Don't set the cold reboot flag |
| 41 | cold Set the cold reboot flag | 47 | cold Set the cold reboot flag |
| 42 | bios Reboot by jumping through the BIOS (only for X86_32) | 48 | bios Reboot by jumping through the BIOS (only for X86_32) |
| @@ -45,6 +51,7 @@ static int reboot_cpu = -1; | |||
| 45 | kbd Use the keyboard controller. cold reset (default) | 51 | kbd Use the keyboard controller. cold reset (default) |
| 46 | acpi Use the RESET_REG in the FADT | 52 | acpi Use the RESET_REG in the FADT |
| 47 | efi Use efi reset_system runtime service | 53 | efi Use efi reset_system runtime service |
| 54 | pci Use the so-called "PCI reset register", CF9 | ||
| 48 | force Avoid anything that could hang. | 55 | force Avoid anything that could hang. |
| 49 | */ | 56 | */ |
| 50 | static int __init reboot_setup(char *str) | 57 | static int __init reboot_setup(char *str) |
| @@ -79,6 +86,7 @@ static int __init reboot_setup(char *str) | |||
| 79 | case 'k': | 86 | case 'k': |
| 80 | case 't': | 87 | case 't': |
| 81 | case 'e': | 88 | case 'e': |
| 89 | case 'p': | ||
| 82 | reboot_type = *str; | 90 | reboot_type = *str; |
| 83 | break; | 91 | break; |
| 84 | 92 | ||
| @@ -404,12 +412,27 @@ static void native_machine_emergency_restart(void) | |||
| 404 | reboot_type = BOOT_KBD; | 412 | reboot_type = BOOT_KBD; |
| 405 | break; | 413 | break; |
| 406 | 414 | ||
| 407 | |||
| 408 | case BOOT_EFI: | 415 | case BOOT_EFI: |
| 409 | if (efi_enabled) | 416 | if (efi_enabled) |
| 410 | efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, | 417 | efi.reset_system(reboot_mode ? |
| 418 | EFI_RESET_WARM : | ||
| 419 | EFI_RESET_COLD, | ||
| 411 | EFI_SUCCESS, 0, NULL); | 420 | EFI_SUCCESS, 0, NULL); |
| 421 | reboot_type = BOOT_KBD; | ||
| 422 | break; | ||
| 412 | 423 | ||
| 424 | case BOOT_CF9: | ||
| 425 | port_cf9_safe = true; | ||
| 426 | /* fall through */ | ||
| 427 | |||
| 428 | case BOOT_CF9_COND: | ||
| 429 | if (port_cf9_safe) { | ||
| 430 | u8 cf9 = inb(0xcf9) & ~6; | ||
| 431 | outb(cf9|2, 0xcf9); /* Request hard reset */ | ||
| 432 | udelay(50); | ||
| 433 | outb(cf9|6, 0xcf9); /* Actually do the reset */ | ||
| 434 | udelay(50); | ||
| 435 | } | ||
| 413 | reboot_type = BOOT_KBD; | 436 | reboot_type = BOOT_KBD; |
| 414 | break; | 437 | break; |
| 415 | } | 438 | } |
| @@ -470,6 +493,11 @@ static void native_machine_restart(char *__unused) | |||
| 470 | 493 | ||
| 471 | static void native_machine_halt(void) | 494 | static void native_machine_halt(void) |
| 472 | { | 495 | { |
| 496 | /* stop other cpus and apics */ | ||
| 497 | machine_shutdown(); | ||
| 498 | |||
| 499 | /* stop this cpu */ | ||
| 500 | stop_this_cpu(NULL); | ||
| 473 | } | 501 | } |
| 474 | 502 | ||
| 475 | static void native_machine_power_off(void) | 503 | static void native_machine_power_off(void) |
| @@ -523,3 +551,95 @@ void machine_crash_shutdown(struct pt_regs *regs) | |||
| 523 | machine_ops.crash_shutdown(regs); | 551 | machine_ops.crash_shutdown(regs); |
| 524 | } | 552 | } |
| 525 | #endif | 553 | #endif |
| 554 | |||
| 555 | |||
| 556 | #if defined(CONFIG_SMP) | ||
| 557 | |||
| 558 | /* This keeps a track of which one is crashing cpu. */ | ||
| 559 | static int crashing_cpu; | ||
| 560 | static nmi_shootdown_cb shootdown_callback; | ||
| 561 | |||
| 562 | static atomic_t waiting_for_crash_ipi; | ||
| 563 | |||
| 564 | static int crash_nmi_callback(struct notifier_block *self, | ||
| 565 | unsigned long val, void *data) | ||
| 566 | { | ||
| 567 | int cpu; | ||
| 568 | |||
| 569 | if (val != DIE_NMI_IPI) | ||
| 570 | return NOTIFY_OK; | ||
| 571 | |||
| 572 | cpu = raw_smp_processor_id(); | ||
| 573 | |||
| 574 | /* Don't do anything if this handler is invoked on crashing cpu. | ||
| 575 | * Otherwise, system will completely hang. Crashing cpu can get | ||
| 576 | * an NMI if system was initially booted with nmi_watchdog parameter. | ||
| 577 | */ | ||
| 578 | if (cpu == crashing_cpu) | ||
| 579 | return NOTIFY_STOP; | ||
| 580 | local_irq_disable(); | ||
| 581 | |||
| 582 | shootdown_callback(cpu, (struct die_args *)data); | ||
| 583 | |||
| 584 | atomic_dec(&waiting_for_crash_ipi); | ||
| 585 | /* Assume hlt works */ | ||
| 586 | halt(); | ||
| 587 | for (;;) | ||
| 588 | cpu_relax(); | ||
| 589 | |||
| 590 | return 1; | ||
| 591 | } | ||
| 592 | |||
| 593 | static void smp_send_nmi_allbutself(void) | ||
| 594 | { | ||
| 595 | cpumask_t mask = cpu_online_map; | ||
| 596 | cpu_clear(safe_smp_processor_id(), mask); | ||
| 597 | if (!cpus_empty(mask)) | ||
| 598 | send_IPI_mask(mask, NMI_VECTOR); | ||
| 599 | } | ||
| 600 | |||
| 601 | static struct notifier_block crash_nmi_nb = { | ||
| 602 | .notifier_call = crash_nmi_callback, | ||
| 603 | }; | ||
| 604 | |||
| 605 | /* Halt all other CPUs, calling the specified function on each of them | ||
| 606 | * | ||
| 607 | * This function can be used to halt all other CPUs on crash | ||
| 608 | * or emergency reboot time. The function passed as parameter | ||
| 609 | * will be called inside a NMI handler on all CPUs. | ||
| 610 | */ | ||
| 611 | void nmi_shootdown_cpus(nmi_shootdown_cb callback) | ||
| 612 | { | ||
| 613 | unsigned long msecs; | ||
| 614 | local_irq_disable(); | ||
| 615 | |||
| 616 | /* Make a note of crashing cpu. Will be used in NMI callback.*/ | ||
| 617 | crashing_cpu = safe_smp_processor_id(); | ||
| 618 | |||
| 619 | shootdown_callback = callback; | ||
| 620 | |||
| 621 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | ||
| 622 | /* Would it be better to replace the trap vector here? */ | ||
| 623 | if (register_die_notifier(&crash_nmi_nb)) | ||
| 624 | return; /* return what? */ | ||
| 625 | /* Ensure the new callback function is set before sending | ||
| 626 | * out the NMI | ||
| 627 | */ | ||
| 628 | wmb(); | ||
| 629 | |||
| 630 | smp_send_nmi_allbutself(); | ||
| 631 | |||
| 632 | msecs = 1000; /* Wait at most a second for the other cpus to stop */ | ||
| 633 | while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { | ||
| 634 | mdelay(1); | ||
| 635 | msecs--; | ||
| 636 | } | ||
| 637 | |||
| 638 | /* Leave the nmi callback set */ | ||
| 639 | } | ||
| 640 | #else /* !CONFIG_SMP */ | ||
| 641 | void nmi_shootdown_cpus(nmi_shootdown_cb callback) | ||
| 642 | { | ||
| 643 | /* No other CPUs to shoot down */ | ||
| 644 | } | ||
| 645 | #endif | ||
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 6f50664b2ba5..a160f3119725 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
| @@ -10,15 +10,12 @@ | |||
| 10 | #include <asm/page.h> | 10 | #include <asm/page.h> |
| 11 | #include <asm/kexec.h> | 11 | #include <asm/kexec.h> |
| 12 | #include <asm/processor-flags.h> | 12 | #include <asm/processor-flags.h> |
| 13 | #include <asm/pgtable.h> | ||
| 14 | 13 | ||
| 15 | /* | 14 | /* |
| 16 | * Must be relocatable PIC code callable as a C function | 15 | * Must be relocatable PIC code callable as a C function |
| 17 | */ | 16 | */ |
| 18 | 17 | ||
| 19 | #define PTR(x) (x << 2) | 18 | #define PTR(x) (x << 2) |
| 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
| 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | ||
| 22 | 19 | ||
| 23 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE | 20 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE |
| 24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for | 21 | * ~ control_page + PAGE_SIZE are used as data storage and stack for |
| @@ -39,7 +36,6 @@ | |||
| 39 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | 36 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) |
| 40 | 37 | ||
| 41 | .text | 38 | .text |
| 42 | .align PAGE_SIZE | ||
| 43 | .globl relocate_kernel | 39 | .globl relocate_kernel |
| 44 | relocate_kernel: | 40 | relocate_kernel: |
| 45 | /* Save the CPU context, used for jumping back */ | 41 | /* Save the CPU context, used for jumping back */ |
| @@ -60,117 +56,6 @@ relocate_kernel: | |||
| 60 | movl %cr4, %eax | 56 | movl %cr4, %eax |
| 61 | movl %eax, CR4(%edi) | 57 | movl %eax, CR4(%edi) |
| 62 | 58 | ||
| 63 | #ifdef CONFIG_X86_PAE | ||
| 64 | /* map the control page at its virtual address */ | ||
| 65 | |||
| 66 | movl PTR(VA_PGD)(%ebp), %edi | ||
| 67 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
| 68 | andl $0xc0000000, %eax | ||
| 69 | shrl $27, %eax | ||
| 70 | addl %edi, %eax | ||
| 71 | |||
| 72 | movl PTR(PA_PMD_0)(%ebp), %edx | ||
| 73 | orl $PAE_PGD_ATTR, %edx | ||
| 74 | movl %edx, (%eax) | ||
| 75 | |||
| 76 | movl PTR(VA_PMD_0)(%ebp), %edi | ||
| 77 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
| 78 | andl $0x3fe00000, %eax | ||
| 79 | shrl $18, %eax | ||
| 80 | addl %edi, %eax | ||
| 81 | |||
| 82 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
| 83 | orl $PAGE_ATTR, %edx | ||
| 84 | movl %edx, (%eax) | ||
| 85 | |||
| 86 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
| 87 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
| 88 | andl $0x001ff000, %eax | ||
| 89 | shrl $9, %eax | ||
| 90 | addl %edi, %eax | ||
| 91 | |||
| 92 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
| 93 | orl $PAGE_ATTR, %edx | ||
| 94 | movl %edx, (%eax) | ||
| 95 | |||
| 96 | /* identity map the control page at its physical address */ | ||
| 97 | |||
| 98 | movl PTR(VA_PGD)(%ebp), %edi | ||
| 99 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
| 100 | andl $0xc0000000, %eax | ||
| 101 | shrl $27, %eax | ||
| 102 | addl %edi, %eax | ||
| 103 | |||
| 104 | movl PTR(PA_PMD_1)(%ebp), %edx | ||
| 105 | orl $PAE_PGD_ATTR, %edx | ||
| 106 | movl %edx, (%eax) | ||
| 107 | |||
| 108 | movl PTR(VA_PMD_1)(%ebp), %edi | ||
| 109 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
| 110 | andl $0x3fe00000, %eax | ||
| 111 | shrl $18, %eax | ||
| 112 | addl %edi, %eax | ||
| 113 | |||
| 114 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
| 115 | orl $PAGE_ATTR, %edx | ||
| 116 | movl %edx, (%eax) | ||
| 117 | |||
| 118 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
| 119 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
| 120 | andl $0x001ff000, %eax | ||
| 121 | shrl $9, %eax | ||
| 122 | addl %edi, %eax | ||
| 123 | |||
| 124 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
| 125 | orl $PAGE_ATTR, %edx | ||
| 126 | movl %edx, (%eax) | ||
| 127 | #else | ||
| 128 | /* map the control page at its virtual address */ | ||
| 129 | |||
| 130 | movl PTR(VA_PGD)(%ebp), %edi | ||
| 131 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
| 132 | andl $0xffc00000, %eax | ||
| 133 | shrl $20, %eax | ||
| 134 | addl %edi, %eax | ||
| 135 | |||
| 136 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
| 137 | orl $PAGE_ATTR, %edx | ||
| 138 | movl %edx, (%eax) | ||
| 139 | |||
| 140 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
| 141 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
| 142 | andl $0x003ff000, %eax | ||
| 143 | shrl $10, %eax | ||
| 144 | addl %edi, %eax | ||
| 145 | |||
| 146 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
| 147 | orl $PAGE_ATTR, %edx | ||
| 148 | movl %edx, (%eax) | ||
| 149 | |||
| 150 | /* identity map the control page at its physical address */ | ||
| 151 | |||
| 152 | movl PTR(VA_PGD)(%ebp), %edi | ||
| 153 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
| 154 | andl $0xffc00000, %eax | ||
| 155 | shrl $20, %eax | ||
| 156 | addl %edi, %eax | ||
| 157 | |||
| 158 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
| 159 | orl $PAGE_ATTR, %edx | ||
| 160 | movl %edx, (%eax) | ||
| 161 | |||
| 162 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
| 163 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
| 164 | andl $0x003ff000, %eax | ||
| 165 | shrl $10, %eax | ||
| 166 | addl %edi, %eax | ||
| 167 | |||
| 168 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
| 169 | orl $PAGE_ATTR, %edx | ||
| 170 | movl %edx, (%eax) | ||
| 171 | #endif | ||
| 172 | |||
| 173 | relocate_new_kernel: | ||
| 174 | /* read the arguments and say goodbye to the stack */ | 59 | /* read the arguments and say goodbye to the stack */ |
| 175 | movl 20+4(%esp), %ebx /* page_list */ | 60 | movl 20+4(%esp), %ebx /* page_list */ |
| 176 | movl 20+8(%esp), %ebp /* list of pages */ | 61 | movl 20+8(%esp), %ebp /* list of pages */ |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bdec76e55594..08e02e8453c9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -93,11 +93,13 @@ | |||
| 93 | #include <asm/desc.h> | 93 | #include <asm/desc.h> |
| 94 | #include <asm/dma.h> | 94 | #include <asm/dma.h> |
| 95 | #include <asm/iommu.h> | 95 | #include <asm/iommu.h> |
| 96 | #include <asm/gart.h> | ||
| 96 | #include <asm/mmu_context.h> | 97 | #include <asm/mmu_context.h> |
| 97 | #include <asm/proto.h> | 98 | #include <asm/proto.h> |
| 98 | 99 | ||
| 99 | #include <mach_apic.h> | 100 | #include <mach_apic.h> |
| 100 | #include <asm/paravirt.h> | 101 | #include <asm/paravirt.h> |
| 102 | #include <asm/hypervisor.h> | ||
| 101 | 103 | ||
| 102 | #include <asm/percpu.h> | 104 | #include <asm/percpu.h> |
| 103 | #include <asm/topology.h> | 105 | #include <asm/topology.h> |
| @@ -448,6 +450,7 @@ static void __init reserve_early_setup_data(void) | |||
| 448 | * @size: Size of the crashkernel memory to reserve. | 450 | * @size: Size of the crashkernel memory to reserve. |
| 449 | * Returns the base address on success, and -1ULL on failure. | 451 | * Returns the base address on success, and -1ULL on failure. |
| 450 | */ | 452 | */ |
| 453 | static | ||
| 451 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | 454 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) |
| 452 | { | 455 | { |
| 453 | const unsigned long long alignment = 16<<20; /* 16M */ | 456 | const unsigned long long alignment = 16<<20; /* 16M */ |
| @@ -583,161 +586,24 @@ static int __init setup_elfcorehdr(char *arg) | |||
| 583 | early_param("elfcorehdr", setup_elfcorehdr); | 586 | early_param("elfcorehdr", setup_elfcorehdr); |
| 584 | #endif | 587 | #endif |
| 585 | 588 | ||
| 586 | static struct x86_quirks default_x86_quirks __initdata; | 589 | static int __init default_update_genapic(void) |
| 587 | |||
| 588 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
| 589 | |||
| 590 | /* | ||
| 591 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
| 592 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
| 593 | * remaining free memory in that area and fill it with a distinct | ||
| 594 | * pattern. | ||
| 595 | */ | ||
| 596 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
| 597 | #define MAX_SCAN_AREAS 8 | ||
| 598 | |||
| 599 | static int __read_mostly memory_corruption_check = -1; | ||
| 600 | |||
| 601 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
| 602 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
| 603 | |||
| 604 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
| 605 | static int num_scan_areas; | ||
| 606 | |||
| 607 | |||
| 608 | static int set_corruption_check(char *arg) | ||
| 609 | { | ||
| 610 | char *end; | ||
| 611 | |||
| 612 | memory_corruption_check = simple_strtol(arg, &end, 10); | ||
| 613 | |||
| 614 | return (*end == 0) ? 0 : -EINVAL; | ||
| 615 | } | ||
| 616 | early_param("memory_corruption_check", set_corruption_check); | ||
| 617 | |||
| 618 | static int set_corruption_check_period(char *arg) | ||
| 619 | { | ||
| 620 | char *end; | ||
| 621 | |||
| 622 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
| 623 | |||
| 624 | return (*end == 0) ? 0 : -EINVAL; | ||
| 625 | } | ||
| 626 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
| 627 | |||
| 628 | static int set_corruption_check_size(char *arg) | ||
| 629 | { | 590 | { |
| 630 | char *end; | 591 | #ifdef CONFIG_X86_SMP |
| 631 | unsigned size; | 592 | # if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) |
| 632 | 593 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; | |
| 633 | size = memparse(arg, &end); | 594 | # endif |
| 634 | |||
| 635 | if (*end == '\0') | ||
| 636 | corruption_check_size = size; | ||
| 637 | |||
| 638 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
| 639 | } | ||
| 640 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
| 641 | |||
| 642 | |||
| 643 | static void __init setup_bios_corruption_check(void) | ||
| 644 | { | ||
| 645 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
| 646 | |||
| 647 | if (memory_corruption_check == -1) { | ||
| 648 | memory_corruption_check = | ||
| 649 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
| 650 | 1 | ||
| 651 | #else | ||
| 652 | 0 | ||
| 653 | #endif | 595 | #endif |
| 654 | ; | ||
| 655 | } | ||
| 656 | |||
| 657 | if (corruption_check_size == 0) | ||
| 658 | memory_corruption_check = 0; | ||
| 659 | |||
| 660 | if (!memory_corruption_check) | ||
| 661 | return; | ||
| 662 | |||
| 663 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
| 664 | 596 | ||
| 665 | while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | 597 | return 0; |
| 666 | u64 size; | ||
| 667 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
| 668 | |||
| 669 | if (addr == 0) | ||
| 670 | break; | ||
| 671 | |||
| 672 | if ((addr + size) > corruption_check_size) | ||
| 673 | size = corruption_check_size - addr; | ||
| 674 | |||
| 675 | if (size == 0) | ||
| 676 | break; | ||
| 677 | |||
| 678 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
| 679 | scan_areas[num_scan_areas].addr = addr; | ||
| 680 | scan_areas[num_scan_areas].size = size; | ||
| 681 | num_scan_areas++; | ||
| 682 | |||
| 683 | /* Assume we've already mapped this early memory */ | ||
| 684 | memset(__va(addr), 0, size); | ||
| 685 | |||
| 686 | addr += size; | ||
| 687 | } | ||
| 688 | |||
| 689 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
| 690 | num_scan_areas); | ||
| 691 | update_e820(); | ||
| 692 | } | ||
| 693 | |||
| 694 | static struct timer_list periodic_check_timer; | ||
| 695 | |||
| 696 | void check_for_bios_corruption(void) | ||
| 697 | { | ||
| 698 | int i; | ||
| 699 | int corruption = 0; | ||
| 700 | |||
| 701 | if (!memory_corruption_check) | ||
| 702 | return; | ||
| 703 | |||
| 704 | for(i = 0; i < num_scan_areas; i++) { | ||
| 705 | unsigned long *addr = __va(scan_areas[i].addr); | ||
| 706 | unsigned long size = scan_areas[i].size; | ||
| 707 | |||
| 708 | for(; size; addr++, size -= sizeof(unsigned long)) { | ||
| 709 | if (!*addr) | ||
| 710 | continue; | ||
| 711 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
| 712 | addr, __pa(addr), *addr); | ||
| 713 | corruption = 1; | ||
| 714 | *addr = 0; | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
| 719 | } | ||
| 720 | |||
| 721 | static void periodic_check_for_corruption(unsigned long data) | ||
| 722 | { | ||
| 723 | check_for_bios_corruption(); | ||
| 724 | mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); | ||
| 725 | } | 598 | } |
| 726 | 599 | ||
| 727 | void start_periodic_check_for_corruption(void) | 600 | static struct x86_quirks default_x86_quirks __initdata = { |
| 728 | { | 601 | .update_genapic = default_update_genapic, |
| 729 | if (!memory_corruption_check || corruption_check_period == 0) | 602 | }; |
| 730 | return; | ||
| 731 | |||
| 732 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
| 733 | corruption_check_period); | ||
| 734 | 603 | ||
| 735 | init_timer(&periodic_check_timer); | 604 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; |
| 736 | periodic_check_timer.function = &periodic_check_for_corruption; | ||
| 737 | periodic_check_for_corruption(0); | ||
| 738 | } | ||
| 739 | #endif | ||
| 740 | 605 | ||
| 606 | #ifdef CONFIG_X86_RESERVE_LOW_64K | ||
| 741 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | 607 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) |
| 742 | { | 608 | { |
| 743 | printk(KERN_NOTICE | 609 | printk(KERN_NOTICE |
| @@ -749,6 +615,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | |||
| 749 | 615 | ||
| 750 | return 0; | 616 | return 0; |
| 751 | } | 617 | } |
| 618 | #endif | ||
| 752 | 619 | ||
| 753 | /* List of systems that have known low memory corruption BIOS problems */ | 620 | /* List of systems that have known low memory corruption BIOS problems */ |
| 754 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | 621 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { |
| @@ -907,6 +774,12 @@ void __init setup_arch(char **cmdline_p) | |||
| 907 | 774 | ||
| 908 | dmi_check_system(bad_bios_dmi_table); | 775 | dmi_check_system(bad_bios_dmi_table); |
| 909 | 776 | ||
| 777 | /* | ||
| 778 | * VMware detection requires dmi to be available, so this | ||
| 779 | * needs to be done after dmi_scan_machine, for the BP. | ||
| 780 | */ | ||
| 781 | init_hypervisor(&boot_cpu_data); | ||
| 782 | |||
| 910 | #ifdef CONFIG_X86_32 | 783 | #ifdef CONFIG_X86_32 |
| 911 | probe_roms(); | 784 | probe_roms(); |
| 912 | #endif | 785 | #endif |
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h deleted file mode 100644 index cc673aa55ce4..000000000000 --- a/arch/x86/kernel/sigframe.h +++ /dev/null | |||
| @@ -1,42 +0,0 @@ | |||
| 1 | #ifdef CONFIG_X86_32 | ||
| 2 | struct sigframe { | ||
| 3 | char __user *pretcode; | ||
| 4 | int sig; | ||
| 5 | struct sigcontext sc; | ||
| 6 | /* | ||
| 7 | * fpstate is unused. fpstate is moved/allocated after | ||
| 8 | * retcode[] below. This movement allows to have the FP state and the | ||
| 9 | * future state extensions (xsave) stay together. | ||
| 10 | * And at the same time retaining the unused fpstate, prevents changing | ||
| 11 | * the offset of extramask[] in the sigframe and thus prevent any | ||
| 12 | * legacy application accessing/modifying it. | ||
| 13 | */ | ||
| 14 | struct _fpstate fpstate_unused; | ||
| 15 | unsigned long extramask[_NSIG_WORDS-1]; | ||
| 16 | char retcode[8]; | ||
| 17 | /* fp state follows here */ | ||
| 18 | }; | ||
| 19 | |||
| 20 | struct rt_sigframe { | ||
| 21 | char __user *pretcode; | ||
| 22 | int sig; | ||
| 23 | struct siginfo __user *pinfo; | ||
| 24 | void __user *puc; | ||
| 25 | struct siginfo info; | ||
| 26 | struct ucontext uc; | ||
| 27 | char retcode[8]; | ||
| 28 | /* fp state follows here */ | ||
| 29 | }; | ||
| 30 | #else | ||
| 31 | struct rt_sigframe { | ||
| 32 | char __user *pretcode; | ||
| 33 | struct ucontext uc; | ||
| 34 | struct siginfo info; | ||
| 35 | /* fp state follows here */ | ||
| 36 | }; | ||
| 37 | |||
| 38 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 39 | sigset_t *set, struct pt_regs *regs); | ||
| 40 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
| 41 | sigset_t *set, struct pt_regs *regs); | ||
| 42 | #endif | ||
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal.c index d6dd057d0f22..89bb7668041d 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal.c | |||
| @@ -1,36 +1,41 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
| 3 | * | 4 | * |
| 4 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | 5 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson |
| 5 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
| 7 | * 2000-2002 x86-64 support by Andi Kleen | ||
| 6 | */ | 8 | */ |
| 7 | #include <linux/list.h> | ||
| 8 | 9 | ||
| 9 | #include <linux/personality.h> | 10 | #include <linux/sched.h> |
| 10 | #include <linux/binfmts.h> | 11 | #include <linux/mm.h> |
| 11 | #include <linux/suspend.h> | 12 | #include <linux/smp.h> |
| 12 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
| 13 | #include <linux/ptrace.h> | ||
| 14 | #include <linux/signal.h> | 14 | #include <linux/signal.h> |
| 15 | #include <linux/stddef.h> | ||
| 16 | #include <linux/unistd.h> | ||
| 17 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
| 18 | #include <linux/sched.h> | ||
| 19 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
| 17 | #include <linux/ptrace.h> | ||
| 20 | #include <linux/tracehook.h> | 18 | #include <linux/tracehook.h> |
| 21 | #include <linux/elf.h> | 19 | #include <linux/unistd.h> |
| 22 | #include <linux/smp.h> | 20 | #include <linux/stddef.h> |
| 23 | #include <linux/mm.h> | 21 | #include <linux/personality.h> |
| 22 | #include <linux/uaccess.h> | ||
| 24 | 23 | ||
| 25 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
| 26 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
| 27 | #include <asm/uaccess.h> | ||
| 28 | #include <asm/i387.h> | 26 | #include <asm/i387.h> |
| 29 | #include <asm/vdso.h> | 27 | #include <asm/vdso.h> |
| 28 | |||
| 29 | #ifdef CONFIG_X86_64 | ||
| 30 | #include <asm/proto.h> | ||
| 31 | #include <asm/ia32_unistd.h> | ||
| 32 | #include <asm/mce.h> | ||
| 33 | #endif /* CONFIG_X86_64 */ | ||
| 34 | |||
| 30 | #include <asm/syscall.h> | 35 | #include <asm/syscall.h> |
| 31 | #include <asm/syscalls.h> | 36 | #include <asm/syscalls.h> |
| 32 | 37 | ||
| 33 | #include "sigframe.h" | 38 | #include <asm/sigframe.h> |
| 34 | 39 | ||
| 35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | 40 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) |
| 36 | 41 | ||
| @@ -45,74 +50,6 @@ | |||
| 45 | # define FIX_EFLAGS __FIX_EFLAGS | 50 | # define FIX_EFLAGS __FIX_EFLAGS |
| 46 | #endif | 51 | #endif |
| 47 | 52 | ||
| 48 | /* | ||
| 49 | * Atomically swap in the new signal mask, and wait for a signal. | ||
| 50 | */ | ||
| 51 | asmlinkage int | ||
| 52 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
| 53 | { | ||
| 54 | mask &= _BLOCKABLE; | ||
| 55 | spin_lock_irq(¤t->sighand->siglock); | ||
| 56 | current->saved_sigmask = current->blocked; | ||
| 57 | siginitset(¤t->blocked, mask); | ||
| 58 | recalc_sigpending(); | ||
| 59 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 60 | |||
| 61 | current->state = TASK_INTERRUPTIBLE; | ||
| 62 | schedule(); | ||
| 63 | set_restore_sigmask(); | ||
| 64 | |||
| 65 | return -ERESTARTNOHAND; | ||
| 66 | } | ||
| 67 | |||
| 68 | asmlinkage int | ||
| 69 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
| 70 | struct old_sigaction __user *oact) | ||
| 71 | { | ||
| 72 | struct k_sigaction new_ka, old_ka; | ||
| 73 | int ret; | ||
| 74 | |||
| 75 | if (act) { | ||
| 76 | old_sigset_t mask; | ||
| 77 | |||
| 78 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
| 79 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
| 80 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
| 81 | return -EFAULT; | ||
| 82 | |||
| 83 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
| 84 | __get_user(mask, &act->sa_mask); | ||
| 85 | siginitset(&new_ka.sa.sa_mask, mask); | ||
| 86 | } | ||
| 87 | |||
| 88 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
| 89 | |||
| 90 | if (!ret && oact) { | ||
| 91 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
| 92 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
| 93 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
| 94 | return -EFAULT; | ||
| 95 | |||
| 96 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
| 97 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
| 98 | } | ||
| 99 | |||
| 100 | return ret; | ||
| 101 | } | ||
| 102 | |||
| 103 | asmlinkage int sys_sigaltstack(unsigned long bx) | ||
| 104 | { | ||
| 105 | /* | ||
| 106 | * This is needed to make gcc realize it doesn't own the | ||
| 107 | * "struct pt_regs" | ||
| 108 | */ | ||
| 109 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
| 110 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
| 111 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
| 112 | |||
| 113 | return do_sigaltstack(uss, uoss, regs->sp); | ||
| 114 | } | ||
| 115 | |||
| 116 | #define COPY(x) { \ | 53 | #define COPY(x) { \ |
| 117 | err |= __get_user(regs->x, &sc->x); \ | 54 | err |= __get_user(regs->x, &sc->x); \ |
| 118 | } | 55 | } |
| @@ -123,7 +60,7 @@ asmlinkage int sys_sigaltstack(unsigned long bx) | |||
| 123 | regs->seg = tmp; \ | 60 | regs->seg = tmp; \ |
| 124 | } | 61 | } |
| 125 | 62 | ||
| 126 | #define COPY_SEG_STRICT(seg) { \ | 63 | #define COPY_SEG_CPL3(seg) { \ |
| 127 | unsigned short tmp; \ | 64 | unsigned short tmp; \ |
| 128 | err |= __get_user(tmp, &sc->seg); \ | 65 | err |= __get_user(tmp, &sc->seg); \ |
| 129 | regs->seg = tmp | 3; \ | 66 | regs->seg = tmp | 3; \ |
| @@ -135,9 +72,6 @@ asmlinkage int sys_sigaltstack(unsigned long bx) | |||
| 135 | loadsegment(seg, tmp); \ | 72 | loadsegment(seg, tmp); \ |
| 136 | } | 73 | } |
| 137 | 74 | ||
| 138 | /* | ||
| 139 | * Do a signal return; undo the signal stack. | ||
| 140 | */ | ||
| 141 | static int | 75 | static int |
| 142 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 76 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, |
| 143 | unsigned long *pax) | 77 | unsigned long *pax) |
| @@ -149,14 +83,36 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
| 149 | /* Always make any pending restarted system calls return -EINTR */ | 83 | /* Always make any pending restarted system calls return -EINTR */ |
| 150 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 84 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
| 151 | 85 | ||
| 86 | #ifdef CONFIG_X86_32 | ||
| 152 | GET_SEG(gs); | 87 | GET_SEG(gs); |
| 153 | COPY_SEG(fs); | 88 | COPY_SEG(fs); |
| 154 | COPY_SEG(es); | 89 | COPY_SEG(es); |
| 155 | COPY_SEG(ds); | 90 | COPY_SEG(ds); |
| 91 | #endif /* CONFIG_X86_32 */ | ||
| 92 | |||
| 156 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 93 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
| 157 | COPY(dx); COPY(cx); COPY(ip); | 94 | COPY(dx); COPY(cx); COPY(ip); |
| 158 | COPY_SEG_STRICT(cs); | 95 | |
| 159 | COPY_SEG_STRICT(ss); | 96 | #ifdef CONFIG_X86_64 |
| 97 | COPY(r8); | ||
| 98 | COPY(r9); | ||
| 99 | COPY(r10); | ||
| 100 | COPY(r11); | ||
| 101 | COPY(r12); | ||
| 102 | COPY(r13); | ||
| 103 | COPY(r14); | ||
| 104 | COPY(r15); | ||
| 105 | #endif /* CONFIG_X86_64 */ | ||
| 106 | |||
| 107 | #ifdef CONFIG_X86_32 | ||
| 108 | COPY_SEG_CPL3(cs); | ||
| 109 | COPY_SEG_CPL3(ss); | ||
| 110 | #else /* !CONFIG_X86_32 */ | ||
| 111 | /* Kernel saves and restores only the CS segment register on signals, | ||
| 112 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
| 113 | * App's signal handler can save/restore other segments if needed. */ | ||
| 114 | COPY_SEG_CPL3(cs); | ||
| 115 | #endif /* CONFIG_X86_32 */ | ||
| 160 | 116 | ||
| 161 | err |= __get_user(tmpflags, &sc->flags); | 117 | err |= __get_user(tmpflags, &sc->flags); |
| 162 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 118 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
| @@ -169,102 +125,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
| 169 | return err; | 125 | return err; |
| 170 | } | 126 | } |
| 171 | 127 | ||
| 172 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | ||
| 173 | { | ||
| 174 | struct sigframe __user *frame; | ||
| 175 | struct pt_regs *regs; | ||
| 176 | unsigned long ax; | ||
| 177 | sigset_t set; | ||
| 178 | |||
| 179 | regs = (struct pt_regs *) &__unused; | ||
| 180 | frame = (struct sigframe __user *)(regs->sp - 8); | ||
| 181 | |||
| 182 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
| 183 | goto badframe; | ||
| 184 | if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 | ||
| 185 | && __copy_from_user(&set.sig[1], &frame->extramask, | ||
| 186 | sizeof(frame->extramask)))) | ||
| 187 | goto badframe; | ||
| 188 | |||
| 189 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
| 190 | spin_lock_irq(¤t->sighand->siglock); | ||
| 191 | current->blocked = set; | ||
| 192 | recalc_sigpending(); | ||
| 193 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 194 | |||
| 195 | if (restore_sigcontext(regs, &frame->sc, &ax)) | ||
| 196 | goto badframe; | ||
| 197 | return ax; | ||
| 198 | |||
| 199 | badframe: | ||
| 200 | if (show_unhandled_signals && printk_ratelimit()) { | ||
| 201 | printk("%s%s[%d] bad frame in sigreturn frame:" | ||
| 202 | "%p ip:%lx sp:%lx oeax:%lx", | ||
| 203 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | ||
| 204 | current->comm, task_pid_nr(current), frame, regs->ip, | ||
| 205 | regs->sp, regs->orig_ax); | ||
| 206 | print_vma_addr(" in ", regs->ip); | ||
| 207 | printk(KERN_CONT "\n"); | ||
| 208 | } | ||
| 209 | |||
| 210 | force_sig(SIGSEGV, current); | ||
| 211 | |||
| 212 | return 0; | ||
| 213 | } | ||
| 214 | |||
| 215 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
| 216 | { | ||
| 217 | struct rt_sigframe __user *frame; | ||
| 218 | unsigned long ax; | ||
| 219 | sigset_t set; | ||
| 220 | |||
| 221 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
| 222 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
| 223 | goto badframe; | ||
| 224 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
| 225 | goto badframe; | ||
| 226 | |||
| 227 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
| 228 | spin_lock_irq(¤t->sighand->siglock); | ||
| 229 | current->blocked = set; | ||
| 230 | recalc_sigpending(); | ||
| 231 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 232 | |||
| 233 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
| 234 | goto badframe; | ||
| 235 | |||
| 236 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
| 237 | goto badframe; | ||
| 238 | |||
| 239 | return ax; | ||
| 240 | |||
| 241 | badframe: | ||
| 242 | signal_fault(regs, frame, "rt_sigreturn"); | ||
| 243 | return 0; | ||
| 244 | } | ||
| 245 | |||
| 246 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | ||
| 247 | { | ||
| 248 | struct pt_regs *regs = (struct pt_regs *)&__unused; | ||
| 249 | |||
| 250 | return do_rt_sigreturn(regs); | ||
| 251 | } | ||
| 252 | |||
| 253 | /* | ||
| 254 | * Set up a signal frame. | ||
| 255 | */ | ||
| 256 | static int | 128 | static int |
| 257 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | 129 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, |
| 258 | struct pt_regs *regs, unsigned long mask) | 130 | struct pt_regs *regs, unsigned long mask) |
| 259 | { | 131 | { |
| 260 | int tmp, err = 0; | 132 | int err = 0; |
| 261 | 133 | ||
| 262 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); | 134 | #ifdef CONFIG_X86_32 |
| 263 | savesegment(gs, tmp); | 135 | { |
| 264 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | 136 | unsigned int tmp; |
| 265 | 137 | ||
| 138 | savesegment(gs, tmp); | ||
| 139 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | ||
| 140 | } | ||
| 141 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); | ||
| 266 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); | 142 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); |
| 267 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); | 143 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); |
| 144 | #endif /* CONFIG_X86_32 */ | ||
| 145 | |||
| 268 | err |= __put_user(regs->di, &sc->di); | 146 | err |= __put_user(regs->di, &sc->di); |
| 269 | err |= __put_user(regs->si, &sc->si); | 147 | err |= __put_user(regs->si, &sc->si); |
| 270 | err |= __put_user(regs->bp, &sc->bp); | 148 | err |= __put_user(regs->bp, &sc->bp); |
| @@ -273,19 +151,33 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
| 273 | err |= __put_user(regs->dx, &sc->dx); | 151 | err |= __put_user(regs->dx, &sc->dx); |
| 274 | err |= __put_user(regs->cx, &sc->cx); | 152 | err |= __put_user(regs->cx, &sc->cx); |
| 275 | err |= __put_user(regs->ax, &sc->ax); | 153 | err |= __put_user(regs->ax, &sc->ax); |
| 154 | #ifdef CONFIG_X86_64 | ||
| 155 | err |= __put_user(regs->r8, &sc->r8); | ||
| 156 | err |= __put_user(regs->r9, &sc->r9); | ||
| 157 | err |= __put_user(regs->r10, &sc->r10); | ||
| 158 | err |= __put_user(regs->r11, &sc->r11); | ||
| 159 | err |= __put_user(regs->r12, &sc->r12); | ||
| 160 | err |= __put_user(regs->r13, &sc->r13); | ||
| 161 | err |= __put_user(regs->r14, &sc->r14); | ||
| 162 | err |= __put_user(regs->r15, &sc->r15); | ||
| 163 | #endif /* CONFIG_X86_64 */ | ||
| 164 | |||
| 276 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 165 | err |= __put_user(current->thread.trap_no, &sc->trapno); |
| 277 | err |= __put_user(current->thread.error_code, &sc->err); | 166 | err |= __put_user(current->thread.error_code, &sc->err); |
| 278 | err |= __put_user(regs->ip, &sc->ip); | 167 | err |= __put_user(regs->ip, &sc->ip); |
| 168 | #ifdef CONFIG_X86_32 | ||
| 279 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); | 169 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); |
| 280 | err |= __put_user(regs->flags, &sc->flags); | 170 | err |= __put_user(regs->flags, &sc->flags); |
| 281 | err |= __put_user(regs->sp, &sc->sp_at_signal); | 171 | err |= __put_user(regs->sp, &sc->sp_at_signal); |
| 282 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); | 172 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); |
| 173 | #else /* !CONFIG_X86_32 */ | ||
| 174 | err |= __put_user(regs->flags, &sc->flags); | ||
| 175 | err |= __put_user(regs->cs, &sc->cs); | ||
| 176 | err |= __put_user(0, &sc->gs); | ||
| 177 | err |= __put_user(0, &sc->fs); | ||
| 178 | #endif /* CONFIG_X86_32 */ | ||
| 283 | 179 | ||
| 284 | tmp = save_i387_xstate(fpstate); | 180 | err |= __put_user(fpstate, &sc->fpstate); |
| 285 | if (tmp < 0) | ||
| 286 | err = 1; | ||
| 287 | else | ||
| 288 | err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); | ||
| 289 | 181 | ||
| 290 | /* non-iBCS2 extensions.. */ | 182 | /* non-iBCS2 extensions.. */ |
| 291 | err |= __put_user(mask, &sc->oldmask); | 183 | err |= __put_user(mask, &sc->oldmask); |
| @@ -295,6 +187,32 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
| 295 | } | 187 | } |
| 296 | 188 | ||
| 297 | /* | 189 | /* |
| 190 | * Set up a signal frame. | ||
| 191 | */ | ||
| 192 | #ifdef CONFIG_X86_32 | ||
| 193 | static const struct { | ||
| 194 | u16 poplmovl; | ||
| 195 | u32 val; | ||
| 196 | u16 int80; | ||
| 197 | } __attribute__((packed)) retcode = { | ||
| 198 | 0xb858, /* popl %eax; movl $..., %eax */ | ||
| 199 | __NR_sigreturn, | ||
| 200 | 0x80cd, /* int $0x80 */ | ||
| 201 | }; | ||
| 202 | |||
| 203 | static const struct { | ||
| 204 | u8 movl; | ||
| 205 | u32 val; | ||
| 206 | u16 int80; | ||
| 207 | u8 pad; | ||
| 208 | } __attribute__((packed)) rt_retcode = { | ||
| 209 | 0xb8, /* movl $..., %eax */ | ||
| 210 | __NR_rt_sigreturn, | ||
| 211 | 0x80cd, /* int $0x80 */ | ||
| 212 | 0 | ||
| 213 | }; | ||
| 214 | |||
| 215 | /* | ||
| 298 | * Determine which stack to use.. | 216 | * Determine which stack to use.. |
| 299 | */ | 217 | */ |
| 300 | static inline void __user * | 218 | static inline void __user * |
| @@ -328,6 +246,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
| 328 | if (used_math()) { | 246 | if (used_math()) { |
| 329 | sp = sp - sig_xstate_size; | 247 | sp = sp - sig_xstate_size; |
| 330 | *fpstate = (struct _fpstate *) sp; | 248 | *fpstate = (struct _fpstate *) sp; |
| 249 | if (save_i387_xstate(*fpstate) < 0) | ||
| 250 | return (void __user *)-1L; | ||
| 331 | } | 251 | } |
| 332 | 252 | ||
| 333 | sp -= frame_size; | 253 | sp -= frame_size; |
| @@ -383,9 +303,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
| 383 | * reasons and because gdb uses it as a signature to notice | 303 | * reasons and because gdb uses it as a signature to notice |
| 384 | * signal handler stack frames. | 304 | * signal handler stack frames. |
| 385 | */ | 305 | */ |
| 386 | err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); | 306 | err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode); |
| 387 | err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2)); | ||
| 388 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); | ||
| 389 | 307 | ||
| 390 | if (err) | 308 | if (err) |
| 391 | return -EFAULT; | 309 | return -EFAULT; |
| @@ -454,9 +372,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 454 | * reasons and because gdb uses it as a signature to notice | 372 | * reasons and because gdb uses it as a signature to notice |
| 455 | * signal handler stack frames. | 373 | * signal handler stack frames. |
| 456 | */ | 374 | */ |
| 457 | err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); | 375 | err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); |
| 458 | err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1)); | ||
| 459 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); | ||
| 460 | 376 | ||
| 461 | if (err) | 377 | if (err) |
| 462 | return -EFAULT; | 378 | return -EFAULT; |
| @@ -475,23 +391,293 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 475 | 391 | ||
| 476 | return 0; | 392 | return 0; |
| 477 | } | 393 | } |
| 394 | #else /* !CONFIG_X86_32 */ | ||
| 395 | /* | ||
| 396 | * Determine which stack to use.. | ||
| 397 | */ | ||
| 398 | static void __user * | ||
| 399 | get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size) | ||
| 400 | { | ||
| 401 | /* Default to using normal stack - redzone*/ | ||
| 402 | sp -= 128; | ||
| 403 | |||
| 404 | /* This is the X/Open sanctioned signal stack switching. */ | ||
| 405 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
| 406 | if (sas_ss_flags(sp) == 0) | ||
| 407 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
| 408 | } | ||
| 409 | |||
| 410 | return (void __user *)round_down(sp - size, 64); | ||
| 411 | } | ||
| 412 | |||
| 413 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 414 | sigset_t *set, struct pt_regs *regs) | ||
| 415 | { | ||
| 416 | struct rt_sigframe __user *frame; | ||
| 417 | void __user *fp = NULL; | ||
| 418 | int err = 0; | ||
| 419 | struct task_struct *me = current; | ||
| 420 | |||
| 421 | if (used_math()) { | ||
| 422 | fp = get_stack(ka, regs->sp, sig_xstate_size); | ||
| 423 | frame = (void __user *)round_down( | ||
| 424 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
| 425 | |||
| 426 | if (save_i387_xstate(fp) < 0) | ||
| 427 | return -EFAULT; | ||
| 428 | } else | ||
| 429 | frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8; | ||
| 430 | |||
| 431 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
| 432 | return -EFAULT; | ||
| 433 | |||
| 434 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
| 435 | if (copy_siginfo_to_user(&frame->info, info)) | ||
| 436 | return -EFAULT; | ||
| 437 | } | ||
| 438 | |||
| 439 | /* Create the ucontext. */ | ||
| 440 | if (cpu_has_xsave) | ||
| 441 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
| 442 | else | ||
| 443 | err |= __put_user(0, &frame->uc.uc_flags); | ||
| 444 | err |= __put_user(0, &frame->uc.uc_link); | ||
| 445 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
| 446 | err |= __put_user(sas_ss_flags(regs->sp), | ||
| 447 | &frame->uc.uc_stack.ss_flags); | ||
| 448 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
| 449 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | ||
| 450 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
| 451 | |||
| 452 | /* Set up to return from userspace. If provided, use a stub | ||
| 453 | already in userspace. */ | ||
| 454 | /* x86-64 should always use SA_RESTORER. */ | ||
| 455 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
| 456 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | ||
| 457 | } else { | ||
| 458 | /* could use a vstub here */ | ||
| 459 | return -EFAULT; | ||
| 460 | } | ||
| 461 | |||
| 462 | if (err) | ||
| 463 | return -EFAULT; | ||
| 464 | |||
| 465 | /* Set up registers for signal handler */ | ||
| 466 | regs->di = sig; | ||
| 467 | /* In case the signal handler was declared without prototypes */ | ||
| 468 | regs->ax = 0; | ||
| 469 | |||
| 470 | /* This also works for non SA_SIGINFO handlers because they expect the | ||
| 471 | next argument after the signal number on the stack. */ | ||
| 472 | regs->si = (unsigned long)&frame->info; | ||
| 473 | regs->dx = (unsigned long)&frame->uc; | ||
| 474 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
| 475 | |||
| 476 | regs->sp = (unsigned long)frame; | ||
| 477 | |||
| 478 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
| 479 | even if the handler happens to be interrupting 32-bit code. */ | ||
| 480 | regs->cs = __USER_CS; | ||
| 481 | |||
| 482 | return 0; | ||
| 483 | } | ||
| 484 | #endif /* CONFIG_X86_32 */ | ||
| 485 | |||
| 486 | #ifdef CONFIG_X86_32 | ||
| 487 | /* | ||
| 488 | * Atomically swap in the new signal mask, and wait for a signal. | ||
| 489 | */ | ||
| 490 | asmlinkage int | ||
| 491 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
| 492 | { | ||
| 493 | mask &= _BLOCKABLE; | ||
| 494 | spin_lock_irq(¤t->sighand->siglock); | ||
| 495 | current->saved_sigmask = current->blocked; | ||
| 496 | siginitset(¤t->blocked, mask); | ||
| 497 | recalc_sigpending(); | ||
| 498 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 499 | |||
| 500 | current->state = TASK_INTERRUPTIBLE; | ||
| 501 | schedule(); | ||
| 502 | set_restore_sigmask(); | ||
| 503 | |||
| 504 | return -ERESTARTNOHAND; | ||
| 505 | } | ||
| 506 | |||
| 507 | asmlinkage int | ||
| 508 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
| 509 | struct old_sigaction __user *oact) | ||
| 510 | { | ||
| 511 | struct k_sigaction new_ka, old_ka; | ||
| 512 | int ret; | ||
| 513 | |||
| 514 | if (act) { | ||
| 515 | old_sigset_t mask; | ||
| 516 | |||
| 517 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
| 518 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
| 519 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
| 520 | return -EFAULT; | ||
| 521 | |||
| 522 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
| 523 | __get_user(mask, &act->sa_mask); | ||
| 524 | siginitset(&new_ka.sa.sa_mask, mask); | ||
| 525 | } | ||
| 526 | |||
| 527 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
| 528 | |||
| 529 | if (!ret && oact) { | ||
| 530 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
| 531 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
| 532 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
| 533 | return -EFAULT; | ||
| 534 | |||
| 535 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
| 536 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
| 537 | } | ||
| 538 | |||
| 539 | return ret; | ||
| 540 | } | ||
| 541 | #endif /* CONFIG_X86_32 */ | ||
| 542 | |||
| 543 | #ifdef CONFIG_X86_32 | ||
| 544 | asmlinkage int sys_sigaltstack(unsigned long bx) | ||
| 545 | { | ||
| 546 | /* | ||
| 547 | * This is needed to make gcc realize it doesn't own the | ||
| 548 | * "struct pt_regs" | ||
| 549 | */ | ||
| 550 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
| 551 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
| 552 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
| 553 | |||
| 554 | return do_sigaltstack(uss, uoss, regs->sp); | ||
| 555 | } | ||
| 556 | #else /* !CONFIG_X86_32 */ | ||
| 557 | asmlinkage long | ||
| 558 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
| 559 | struct pt_regs *regs) | ||
| 560 | { | ||
| 561 | return do_sigaltstack(uss, uoss, regs->sp); | ||
| 562 | } | ||
| 563 | #endif /* CONFIG_X86_32 */ | ||
| 564 | |||
| 565 | /* | ||
| 566 | * Do a signal return; undo the signal stack. | ||
| 567 | */ | ||
| 568 | #ifdef CONFIG_X86_32 | ||
| 569 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | ||
| 570 | { | ||
| 571 | struct sigframe __user *frame; | ||
| 572 | struct pt_regs *regs; | ||
| 573 | unsigned long ax; | ||
| 574 | sigset_t set; | ||
| 575 | |||
| 576 | regs = (struct pt_regs *) &__unused; | ||
| 577 | frame = (struct sigframe __user *)(regs->sp - 8); | ||
| 578 | |||
| 579 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
| 580 | goto badframe; | ||
| 581 | if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 | ||
| 582 | && __copy_from_user(&set.sig[1], &frame->extramask, | ||
| 583 | sizeof(frame->extramask)))) | ||
| 584 | goto badframe; | ||
| 585 | |||
| 586 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
| 587 | spin_lock_irq(¤t->sighand->siglock); | ||
| 588 | current->blocked = set; | ||
| 589 | recalc_sigpending(); | ||
| 590 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 591 | |||
| 592 | if (restore_sigcontext(regs, &frame->sc, &ax)) | ||
| 593 | goto badframe; | ||
| 594 | return ax; | ||
| 595 | |||
| 596 | badframe: | ||
| 597 | signal_fault(regs, frame, "sigreturn"); | ||
| 598 | |||
| 599 | return 0; | ||
| 600 | } | ||
| 601 | #endif /* CONFIG_X86_32 */ | ||
| 602 | |||
| 603 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
| 604 | { | ||
| 605 | struct rt_sigframe __user *frame; | ||
| 606 | unsigned long ax; | ||
| 607 | sigset_t set; | ||
| 608 | |||
| 609 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
| 610 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
| 611 | goto badframe; | ||
| 612 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
| 613 | goto badframe; | ||
| 614 | |||
| 615 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
| 616 | spin_lock_irq(¤t->sighand->siglock); | ||
| 617 | current->blocked = set; | ||
| 618 | recalc_sigpending(); | ||
| 619 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 620 | |||
| 621 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
| 622 | goto badframe; | ||
| 623 | |||
| 624 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
| 625 | goto badframe; | ||
| 626 | |||
| 627 | return ax; | ||
| 628 | |||
| 629 | badframe: | ||
| 630 | signal_fault(regs, frame, "rt_sigreturn"); | ||
| 631 | return 0; | ||
| 632 | } | ||
| 633 | |||
| 634 | #ifdef CONFIG_X86_32 | ||
| 635 | asmlinkage int sys_rt_sigreturn(struct pt_regs regs) | ||
| 636 | { | ||
| 637 | return do_rt_sigreturn(®s); | ||
| 638 | } | ||
| 639 | #else /* !CONFIG_X86_32 */ | ||
| 640 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
| 641 | { | ||
| 642 | return do_rt_sigreturn(regs); | ||
| 643 | } | ||
| 644 | #endif /* CONFIG_X86_32 */ | ||
| 478 | 645 | ||
| 479 | /* | 646 | /* |
| 480 | * OK, we're invoking a handler: | 647 | * OK, we're invoking a handler: |
| 481 | */ | 648 | */ |
| 482 | static int signr_convert(int sig) | 649 | static int signr_convert(int sig) |
| 483 | { | 650 | { |
| 651 | #ifdef CONFIG_X86_32 | ||
| 484 | struct thread_info *info = current_thread_info(); | 652 | struct thread_info *info = current_thread_info(); |
| 485 | 653 | ||
| 486 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) | 654 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) |
| 487 | return info->exec_domain->signal_invmap[sig]; | 655 | return info->exec_domain->signal_invmap[sig]; |
| 656 | #endif /* CONFIG_X86_32 */ | ||
| 488 | return sig; | 657 | return sig; |
| 489 | } | 658 | } |
| 490 | 659 | ||
| 660 | #ifdef CONFIG_X86_32 | ||
| 661 | |||
| 491 | #define is_ia32 1 | 662 | #define is_ia32 1 |
| 492 | #define ia32_setup_frame __setup_frame | 663 | #define ia32_setup_frame __setup_frame |
| 493 | #define ia32_setup_rt_frame __setup_rt_frame | 664 | #define ia32_setup_rt_frame __setup_rt_frame |
| 494 | 665 | ||
| 666 | #else /* !CONFIG_X86_32 */ | ||
| 667 | |||
| 668 | #ifdef CONFIG_IA32_EMULATION | ||
| 669 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
| 670 | #else /* !CONFIG_IA32_EMULATION */ | ||
| 671 | #define is_ia32 0 | ||
| 672 | #endif /* CONFIG_IA32_EMULATION */ | ||
| 673 | |||
| 674 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 675 | sigset_t *set, struct pt_regs *regs); | ||
| 676 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
| 677 | sigset_t *set, struct pt_regs *regs); | ||
| 678 | |||
| 679 | #endif /* CONFIG_X86_32 */ | ||
| 680 | |||
| 495 | static int | 681 | static int |
| 496 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 682 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
| 497 | sigset_t *set, struct pt_regs *regs) | 683 | sigset_t *set, struct pt_regs *regs) |
| @@ -592,7 +778,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 592 | return 0; | 778 | return 0; |
| 593 | } | 779 | } |
| 594 | 780 | ||
| 781 | #ifdef CONFIG_X86_32 | ||
| 595 | #define NR_restart_syscall __NR_restart_syscall | 782 | #define NR_restart_syscall __NR_restart_syscall |
| 783 | #else /* !CONFIG_X86_32 */ | ||
| 784 | #define NR_restart_syscall \ | ||
| 785 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
| 786 | #endif /* CONFIG_X86_32 */ | ||
| 787 | |||
| 596 | /* | 788 | /* |
| 597 | * Note that 'init' is a special process: it doesn't get signals it doesn't | 789 | * Note that 'init' is a special process: it doesn't get signals it doesn't |
| 598 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 790 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
| @@ -704,8 +896,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
| 704 | struct task_struct *me = current; | 896 | struct task_struct *me = current; |
| 705 | 897 | ||
| 706 | if (show_unhandled_signals && printk_ratelimit()) { | 898 | if (show_unhandled_signals && printk_ratelimit()) { |
| 707 | printk(KERN_INFO | 899 | printk("%s" |
| 708 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | 900 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", |
| 901 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | ||
| 709 | me->comm, me->pid, where, frame, | 902 | me->comm, me->pid, where, frame, |
| 710 | regs->ip, regs->sp, regs->orig_ax); | 903 | regs->ip, regs->sp, regs->orig_ax); |
| 711 | print_vma_addr(" in ", regs->ip); | 904 | print_vma_addr(" in ", regs->ip); |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c deleted file mode 100644 index a5c9627f4db9..000000000000 --- a/arch/x86/kernel/signal_64.c +++ /dev/null | |||
| @@ -1,516 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
| 4 | * | ||
| 5 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | ||
| 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | ||
| 7 | * 2000-2002 x86-64 support by Andi Kleen | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/mm.h> | ||
| 12 | #include <linux/smp.h> | ||
| 13 | #include <linux/kernel.h> | ||
| 14 | #include <linux/signal.h> | ||
| 15 | #include <linux/errno.h> | ||
| 16 | #include <linux/wait.h> | ||
| 17 | #include <linux/ptrace.h> | ||
| 18 | #include <linux/tracehook.h> | ||
| 19 | #include <linux/unistd.h> | ||
| 20 | #include <linux/stddef.h> | ||
| 21 | #include <linux/personality.h> | ||
| 22 | #include <linux/compiler.h> | ||
| 23 | #include <linux/uaccess.h> | ||
| 24 | |||
| 25 | #include <asm/processor.h> | ||
| 26 | #include <asm/ucontext.h> | ||
| 27 | #include <asm/i387.h> | ||
| 28 | #include <asm/proto.h> | ||
| 29 | #include <asm/ia32_unistd.h> | ||
| 30 | #include <asm/mce.h> | ||
| 31 | #include <asm/syscall.h> | ||
| 32 | #include <asm/syscalls.h> | ||
| 33 | #include "sigframe.h" | ||
| 34 | |||
| 35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | ||
| 36 | |||
| 37 | #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ | ||
| 38 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ | ||
| 39 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | ||
| 40 | X86_EFLAGS_CF) | ||
| 41 | |||
| 42 | #ifdef CONFIG_X86_32 | ||
| 43 | # define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) | ||
| 44 | #else | ||
| 45 | # define FIX_EFLAGS __FIX_EFLAGS | ||
| 46 | #endif | ||
| 47 | |||
| 48 | asmlinkage long | ||
| 49 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
| 50 | struct pt_regs *regs) | ||
| 51 | { | ||
| 52 | return do_sigaltstack(uss, uoss, regs->sp); | ||
| 53 | } | ||
| 54 | |||
| 55 | #define COPY(x) { \ | ||
| 56 | err |= __get_user(regs->x, &sc->x); \ | ||
| 57 | } | ||
| 58 | |||
| 59 | #define COPY_SEG_STRICT(seg) { \ | ||
| 60 | unsigned short tmp; \ | ||
| 61 | err |= __get_user(tmp, &sc->seg); \ | ||
| 62 | regs->seg = tmp | 3; \ | ||
| 63 | } | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Do a signal return; undo the signal stack. | ||
| 67 | */ | ||
| 68 | static int | ||
| 69 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | ||
| 70 | unsigned long *pax) | ||
| 71 | { | ||
| 72 | void __user *buf; | ||
| 73 | unsigned int tmpflags; | ||
| 74 | unsigned int err = 0; | ||
| 75 | |||
| 76 | /* Always make any pending restarted system calls return -EINTR */ | ||
| 77 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | ||
| 78 | |||
| 79 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | ||
| 80 | COPY(dx); COPY(cx); COPY(ip); | ||
| 81 | COPY(r8); | ||
| 82 | COPY(r9); | ||
| 83 | COPY(r10); | ||
| 84 | COPY(r11); | ||
| 85 | COPY(r12); | ||
| 86 | COPY(r13); | ||
| 87 | COPY(r14); | ||
| 88 | COPY(r15); | ||
| 89 | |||
| 90 | /* Kernel saves and restores only the CS segment register on signals, | ||
| 91 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
| 92 | * App's signal handler can save/restore other segments if needed. */ | ||
| 93 | COPY_SEG_STRICT(cs); | ||
| 94 | |||
| 95 | err |= __get_user(tmpflags, &sc->flags); | ||
| 96 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | ||
| 97 | regs->orig_ax = -1; /* disable syscall checks */ | ||
| 98 | |||
| 99 | err |= __get_user(buf, &sc->fpstate); | ||
| 100 | err |= restore_i387_xstate(buf); | ||
| 101 | |||
| 102 | err |= __get_user(*pax, &sc->ax); | ||
| 103 | return err; | ||
| 104 | } | ||
| 105 | |||
| 106 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
| 107 | { | ||
| 108 | struct rt_sigframe __user *frame; | ||
| 109 | unsigned long ax; | ||
| 110 | sigset_t set; | ||
| 111 | |||
| 112 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
| 113 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
| 114 | goto badframe; | ||
| 115 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
| 116 | goto badframe; | ||
| 117 | |||
| 118 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
| 119 | spin_lock_irq(¤t->sighand->siglock); | ||
| 120 | current->blocked = set; | ||
| 121 | recalc_sigpending(); | ||
| 122 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 123 | |||
| 124 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
| 125 | goto badframe; | ||
| 126 | |||
| 127 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
| 128 | goto badframe; | ||
| 129 | |||
| 130 | return ax; | ||
| 131 | |||
| 132 | badframe: | ||
| 133 | signal_fault(regs, frame, "rt_sigreturn"); | ||
| 134 | return 0; | ||
| 135 | } | ||
| 136 | |||
| 137 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
| 138 | { | ||
| 139 | return do_rt_sigreturn(regs); | ||
| 140 | } | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Set up a signal frame. | ||
| 144 | */ | ||
| 145 | |||
| 146 | static inline int | ||
| 147 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, | ||
| 148 | unsigned long mask, struct task_struct *me) | ||
| 149 | { | ||
| 150 | int err = 0; | ||
| 151 | |||
| 152 | err |= __put_user(regs->cs, &sc->cs); | ||
| 153 | err |= __put_user(0, &sc->gs); | ||
| 154 | err |= __put_user(0, &sc->fs); | ||
| 155 | |||
| 156 | err |= __put_user(regs->di, &sc->di); | ||
| 157 | err |= __put_user(regs->si, &sc->si); | ||
| 158 | err |= __put_user(regs->bp, &sc->bp); | ||
| 159 | err |= __put_user(regs->sp, &sc->sp); | ||
| 160 | err |= __put_user(regs->bx, &sc->bx); | ||
| 161 | err |= __put_user(regs->dx, &sc->dx); | ||
| 162 | err |= __put_user(regs->cx, &sc->cx); | ||
| 163 | err |= __put_user(regs->ax, &sc->ax); | ||
| 164 | err |= __put_user(regs->r8, &sc->r8); | ||
| 165 | err |= __put_user(regs->r9, &sc->r9); | ||
| 166 | err |= __put_user(regs->r10, &sc->r10); | ||
| 167 | err |= __put_user(regs->r11, &sc->r11); | ||
| 168 | err |= __put_user(regs->r12, &sc->r12); | ||
| 169 | err |= __put_user(regs->r13, &sc->r13); | ||
| 170 | err |= __put_user(regs->r14, &sc->r14); | ||
| 171 | err |= __put_user(regs->r15, &sc->r15); | ||
| 172 | err |= __put_user(me->thread.trap_no, &sc->trapno); | ||
| 173 | err |= __put_user(me->thread.error_code, &sc->err); | ||
| 174 | err |= __put_user(regs->ip, &sc->ip); | ||
| 175 | err |= __put_user(regs->flags, &sc->flags); | ||
| 176 | err |= __put_user(mask, &sc->oldmask); | ||
| 177 | err |= __put_user(me->thread.cr2, &sc->cr2); | ||
| 178 | |||
| 179 | return err; | ||
| 180 | } | ||
| 181 | |||
| 182 | /* | ||
| 183 | * Determine which stack to use.. | ||
| 184 | */ | ||
| 185 | |||
| 186 | static void __user * | ||
| 187 | get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | ||
| 188 | { | ||
| 189 | unsigned long sp; | ||
| 190 | |||
| 191 | /* Default to using normal stack - redzone*/ | ||
| 192 | sp = regs->sp - 128; | ||
| 193 | |||
| 194 | /* This is the X/Open sanctioned signal stack switching. */ | ||
| 195 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
| 196 | if (sas_ss_flags(sp) == 0) | ||
| 197 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
| 198 | } | ||
| 199 | |||
| 200 | return (void __user *)round_down(sp - size, 64); | ||
| 201 | } | ||
| 202 | |||
| 203 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 204 | sigset_t *set, struct pt_regs *regs) | ||
| 205 | { | ||
| 206 | struct rt_sigframe __user *frame; | ||
| 207 | void __user *fp = NULL; | ||
| 208 | int err = 0; | ||
| 209 | struct task_struct *me = current; | ||
| 210 | |||
| 211 | if (used_math()) { | ||
| 212 | fp = get_stack(ka, regs, sig_xstate_size); | ||
| 213 | frame = (void __user *)round_down( | ||
| 214 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
| 215 | |||
| 216 | if (save_i387_xstate(fp) < 0) | ||
| 217 | return -EFAULT; | ||
| 218 | } else | ||
| 219 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; | ||
| 220 | |||
| 221 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
| 222 | return -EFAULT; | ||
| 223 | |||
| 224 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
| 225 | if (copy_siginfo_to_user(&frame->info, info)) | ||
| 226 | return -EFAULT; | ||
| 227 | } | ||
| 228 | |||
| 229 | /* Create the ucontext. */ | ||
| 230 | if (cpu_has_xsave) | ||
| 231 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
| 232 | else | ||
| 233 | err |= __put_user(0, &frame->uc.uc_flags); | ||
| 234 | err |= __put_user(0, &frame->uc.uc_link); | ||
| 235 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
| 236 | err |= __put_user(sas_ss_flags(regs->sp), | ||
| 237 | &frame->uc.uc_stack.ss_flags); | ||
| 238 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
| 239 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); | ||
| 240 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); | ||
| 241 | if (sizeof(*set) == 16) { | ||
| 242 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); | ||
| 243 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); | ||
| 244 | } else | ||
| 245 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
| 246 | |||
| 247 | /* Set up to return from userspace. If provided, use a stub | ||
| 248 | already in userspace. */ | ||
| 249 | /* x86-64 should always use SA_RESTORER. */ | ||
| 250 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
| 251 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | ||
| 252 | } else { | ||
| 253 | /* could use a vstub here */ | ||
| 254 | return -EFAULT; | ||
| 255 | } | ||
| 256 | |||
| 257 | if (err) | ||
| 258 | return -EFAULT; | ||
| 259 | |||
| 260 | /* Set up registers for signal handler */ | ||
| 261 | regs->di = sig; | ||
| 262 | /* In case the signal handler was declared without prototypes */ | ||
| 263 | regs->ax = 0; | ||
| 264 | |||
| 265 | /* This also works for non SA_SIGINFO handlers because they expect the | ||
| 266 | next argument after the signal number on the stack. */ | ||
| 267 | regs->si = (unsigned long)&frame->info; | ||
| 268 | regs->dx = (unsigned long)&frame->uc; | ||
| 269 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
| 270 | |||
| 271 | regs->sp = (unsigned long)frame; | ||
| 272 | |||
| 273 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
| 274 | even if the handler happens to be interrupting 32-bit code. */ | ||
| 275 | regs->cs = __USER_CS; | ||
| 276 | |||
| 277 | return 0; | ||
| 278 | } | ||
| 279 | |||
| 280 | /* | ||
| 281 | * OK, we're invoking a handler | ||
| 282 | */ | ||
| 283 | static int signr_convert(int sig) | ||
| 284 | { | ||
| 285 | return sig; | ||
| 286 | } | ||
| 287 | |||
| 288 | #ifdef CONFIG_IA32_EMULATION | ||
| 289 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
| 290 | #else | ||
| 291 | #define is_ia32 0 | ||
| 292 | #endif | ||
| 293 | |||
| 294 | static int | ||
| 295 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 296 | sigset_t *set, struct pt_regs *regs) | ||
| 297 | { | ||
| 298 | int usig = signr_convert(sig); | ||
| 299 | int ret; | ||
| 300 | |||
| 301 | /* Set up the stack frame */ | ||
| 302 | if (is_ia32) { | ||
| 303 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
| 304 | ret = ia32_setup_rt_frame(usig, ka, info, set, regs); | ||
| 305 | else | ||
| 306 | ret = ia32_setup_frame(usig, ka, set, regs); | ||
| 307 | } else | ||
| 308 | ret = __setup_rt_frame(sig, ka, info, set, regs); | ||
| 309 | |||
| 310 | if (ret) { | ||
| 311 | force_sigsegv(sig, current); | ||
| 312 | return -EFAULT; | ||
| 313 | } | ||
| 314 | |||
| 315 | return ret; | ||
| 316 | } | ||
| 317 | |||
| 318 | static int | ||
| 319 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | ||
| 320 | sigset_t *oldset, struct pt_regs *regs) | ||
| 321 | { | ||
| 322 | int ret; | ||
| 323 | |||
| 324 | /* Are we from a system call? */ | ||
| 325 | if (syscall_get_nr(current, regs) >= 0) { | ||
| 326 | /* If so, check system call restarting.. */ | ||
| 327 | switch (syscall_get_error(current, regs)) { | ||
| 328 | case -ERESTART_RESTARTBLOCK: | ||
| 329 | case -ERESTARTNOHAND: | ||
| 330 | regs->ax = -EINTR; | ||
| 331 | break; | ||
| 332 | |||
| 333 | case -ERESTARTSYS: | ||
| 334 | if (!(ka->sa.sa_flags & SA_RESTART)) { | ||
| 335 | regs->ax = -EINTR; | ||
| 336 | break; | ||
| 337 | } | ||
| 338 | /* fallthrough */ | ||
| 339 | case -ERESTARTNOINTR: | ||
| 340 | regs->ax = regs->orig_ax; | ||
| 341 | regs->ip -= 2; | ||
| 342 | break; | ||
| 343 | } | ||
| 344 | } | ||
| 345 | |||
| 346 | /* | ||
| 347 | * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF | ||
| 348 | * flag so that register information in the sigcontext is correct. | ||
| 349 | */ | ||
| 350 | if (unlikely(regs->flags & X86_EFLAGS_TF) && | ||
| 351 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | ||
| 352 | regs->flags &= ~X86_EFLAGS_TF; | ||
| 353 | |||
| 354 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | ||
| 355 | |||
| 356 | if (ret) | ||
| 357 | return ret; | ||
| 358 | |||
| 359 | #ifdef CONFIG_X86_64 | ||
| 360 | /* | ||
| 361 | * This has nothing to do with segment registers, | ||
| 362 | * despite the name. This magic affects uaccess.h | ||
| 363 | * macros' behavior. Reset it to the normal setting. | ||
| 364 | */ | ||
| 365 | set_fs(USER_DS); | ||
| 366 | #endif | ||
| 367 | |||
| 368 | /* | ||
| 369 | * Clear the direction flag as per the ABI for function entry. | ||
| 370 | */ | ||
| 371 | regs->flags &= ~X86_EFLAGS_DF; | ||
| 372 | |||
| 373 | /* | ||
| 374 | * Clear TF when entering the signal handler, but | ||
| 375 | * notify any tracer that was single-stepping it. | ||
| 376 | * The tracer may want to single-step inside the | ||
| 377 | * handler too. | ||
| 378 | */ | ||
| 379 | regs->flags &= ~X86_EFLAGS_TF; | ||
| 380 | |||
| 381 | spin_lock_irq(¤t->sighand->siglock); | ||
| 382 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
| 383 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
| 384 | sigaddset(¤t->blocked, sig); | ||
| 385 | recalc_sigpending(); | ||
| 386 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 387 | |||
| 388 | tracehook_signal_handler(sig, info, ka, regs, | ||
| 389 | test_thread_flag(TIF_SINGLESTEP)); | ||
| 390 | |||
| 391 | return 0; | ||
| 392 | } | ||
| 393 | |||
| 394 | #define NR_restart_syscall \ | ||
| 395 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
| 396 | /* | ||
| 397 | * Note that 'init' is a special process: it doesn't get signals it doesn't | ||
| 398 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | ||
| 399 | * mistake. | ||
| 400 | */ | ||
| 401 | static void do_signal(struct pt_regs *regs) | ||
| 402 | { | ||
| 403 | struct k_sigaction ka; | ||
| 404 | siginfo_t info; | ||
| 405 | int signr; | ||
| 406 | sigset_t *oldset; | ||
| 407 | |||
| 408 | /* | ||
| 409 | * We want the common case to go fast, which is why we may in certain | ||
| 410 | * cases get here from kernel mode. Just return without doing anything | ||
| 411 | * if so. | ||
| 412 | * X86_32: vm86 regs switched out by assembly code before reaching | ||
| 413 | * here, so testing against kernel CS suffices. | ||
| 414 | */ | ||
| 415 | if (!user_mode(regs)) | ||
| 416 | return; | ||
| 417 | |||
| 418 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) | ||
| 419 | oldset = ¤t->saved_sigmask; | ||
| 420 | else | ||
| 421 | oldset = ¤t->blocked; | ||
| 422 | |||
| 423 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
| 424 | if (signr > 0) { | ||
| 425 | /* | ||
| 426 | * Re-enable any watchpoints before delivering the | ||
| 427 | * signal to user space. The processor register will | ||
| 428 | * have been cleared if the watchpoint triggered | ||
| 429 | * inside the kernel. | ||
| 430 | */ | ||
| 431 | if (current->thread.debugreg7) | ||
| 432 | set_debugreg(current->thread.debugreg7, 7); | ||
| 433 | |||
| 434 | /* Whee! Actually deliver the signal. */ | ||
| 435 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | ||
| 436 | /* | ||
| 437 | * A signal was successfully delivered; the saved | ||
| 438 | * sigmask will have been stored in the signal frame, | ||
| 439 | * and will be restored by sigreturn, so we can simply | ||
| 440 | * clear the TS_RESTORE_SIGMASK flag. | ||
| 441 | */ | ||
| 442 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
| 443 | } | ||
| 444 | return; | ||
| 445 | } | ||
| 446 | |||
| 447 | /* Did we come from a system call? */ | ||
| 448 | if (syscall_get_nr(current, regs) >= 0) { | ||
| 449 | /* Restart the system call - no handlers present */ | ||
| 450 | switch (syscall_get_error(current, regs)) { | ||
| 451 | case -ERESTARTNOHAND: | ||
| 452 | case -ERESTARTSYS: | ||
| 453 | case -ERESTARTNOINTR: | ||
| 454 | regs->ax = regs->orig_ax; | ||
| 455 | regs->ip -= 2; | ||
| 456 | break; | ||
| 457 | |||
| 458 | case -ERESTART_RESTARTBLOCK: | ||
| 459 | regs->ax = NR_restart_syscall; | ||
| 460 | regs->ip -= 2; | ||
| 461 | break; | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | /* | ||
| 466 | * If there's no signal to deliver, we just put the saved sigmask | ||
| 467 | * back. | ||
| 468 | */ | ||
| 469 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { | ||
| 470 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
| 471 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); | ||
| 472 | } | ||
| 473 | } | ||
| 474 | |||
| 475 | /* | ||
| 476 | * notification of userspace execution resumption | ||
| 477 | * - triggered by the TIF_WORK_MASK flags | ||
| 478 | */ | ||
| 479 | void | ||
| 480 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | ||
| 481 | { | ||
| 482 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) | ||
| 483 | /* notify userspace of pending MCEs */ | ||
| 484 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
| 485 | mce_notify_user(); | ||
| 486 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | ||
| 487 | |||
| 488 | /* deal with pending signal delivery */ | ||
| 489 | if (thread_info_flags & _TIF_SIGPENDING) | ||
| 490 | do_signal(regs); | ||
| 491 | |||
| 492 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
| 493 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
| 494 | tracehook_notify_resume(regs); | ||
| 495 | } | ||
| 496 | |||
| 497 | #ifdef CONFIG_X86_32 | ||
| 498 | clear_thread_flag(TIF_IRET); | ||
| 499 | #endif /* CONFIG_X86_32 */ | ||
| 500 | } | ||
| 501 | |||
| 502 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | ||
| 503 | { | ||
| 504 | struct task_struct *me = current; | ||
| 505 | |||
| 506 | if (show_unhandled_signals && printk_ratelimit()) { | ||
| 507 | printk(KERN_INFO | ||
| 508 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | ||
| 509 | me->comm, me->pid, where, frame, | ||
| 510 | regs->ip, regs->sp, regs->orig_ax); | ||
| 511 | print_vma_addr(" in ", regs->ip); | ||
| 512 | printk(KERN_CONT "\n"); | ||
| 513 | } | ||
| 514 | |||
| 515 | force_sig(SIGSEGV, me); | ||
| 516 | } | ||
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18f9b19f5f8f..7e558db362c1 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
| @@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask) | |||
| 140 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | 140 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | static void stop_this_cpu(void *dummy) | ||
| 144 | { | ||
| 145 | local_irq_disable(); | ||
| 146 | /* | ||
| 147 | * Remove this CPU: | ||
| 148 | */ | ||
| 149 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
| 150 | disable_local_APIC(); | ||
| 151 | if (hlt_works(smp_processor_id())) | ||
| 152 | for (;;) halt(); | ||
| 153 | for (;;); | ||
| 154 | } | ||
| 155 | |||
| 156 | /* | 143 | /* |
| 157 | * this function calls the 'stop' function on all other CPUs in the system. | 144 | * this function calls the 'stop' function on all other CPUs in the system. |
| 158 | */ | 145 | */ |
| @@ -178,11 +165,7 @@ static void native_smp_send_stop(void) | |||
| 178 | void smp_reschedule_interrupt(struct pt_regs *regs) | 165 | void smp_reschedule_interrupt(struct pt_regs *regs) |
| 179 | { | 166 | { |
| 180 | ack_APIC_irq(); | 167 | ack_APIC_irq(); |
| 181 | #ifdef CONFIG_X86_32 | 168 | inc_irq_stat(irq_resched_count); |
| 182 | __get_cpu_var(irq_stat).irq_resched_count++; | ||
| 183 | #else | ||
| 184 | add_pda(irq_resched_count, 1); | ||
| 185 | #endif | ||
| 186 | } | 169 | } |
| 187 | 170 | ||
| 188 | void smp_call_function_interrupt(struct pt_regs *regs) | 171 | void smp_call_function_interrupt(struct pt_regs *regs) |
| @@ -190,11 +173,7 @@ void smp_call_function_interrupt(struct pt_regs *regs) | |||
| 190 | ack_APIC_irq(); | 173 | ack_APIC_irq(); |
| 191 | irq_enter(); | 174 | irq_enter(); |
| 192 | generic_smp_call_function_interrupt(); | 175 | generic_smp_call_function_interrupt(); |
| 193 | #ifdef CONFIG_X86_32 | 176 | inc_irq_stat(irq_call_count); |
| 194 | __get_cpu_var(irq_stat).irq_call_count++; | ||
| 195 | #else | ||
| 196 | add_pda(irq_call_count, 1); | ||
| 197 | #endif | ||
| 198 | irq_exit(); | 177 | irq_exit(); |
| 199 | } | 178 | } |
| 200 | 179 | ||
| @@ -203,11 +182,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
| 203 | ack_APIC_irq(); | 182 | ack_APIC_irq(); |
| 204 | irq_enter(); | 183 | irq_enter(); |
| 205 | generic_smp_call_function_single_interrupt(); | 184 | generic_smp_call_function_single_interrupt(); |
| 206 | #ifdef CONFIG_X86_32 | 185 | inc_irq_stat(irq_call_count); |
| 207 | __get_cpu_var(irq_stat).irq_call_count++; | ||
| 208 | #else | ||
| 209 | add_pda(irq_call_count, 1); | ||
| 210 | #endif | ||
| 211 | irq_exit(); | 186 | irq_exit(); |
| 212 | } | 187 | } |
| 213 | 188 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f71f96fc9e62..f8500c969442 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -62,6 +62,7 @@ | |||
| 62 | #include <asm/mtrr.h> | 62 | #include <asm/mtrr.h> |
| 63 | #include <asm/vmi.h> | 63 | #include <asm/vmi.h> |
| 64 | #include <asm/genapic.h> | 64 | #include <asm/genapic.h> |
| 65 | #include <asm/setup.h> | ||
| 65 | #include <linux/mc146818rtc.h> | 66 | #include <linux/mc146818rtc.h> |
| 66 | 67 | ||
| 67 | #include <mach_apic.h> | 68 | #include <mach_apic.h> |
| @@ -287,7 +288,7 @@ static int __cpuinitdata unsafe_smp; | |||
| 287 | /* | 288 | /* |
| 288 | * Activate a secondary processor. | 289 | * Activate a secondary processor. |
| 289 | */ | 290 | */ |
| 290 | static void __cpuinit start_secondary(void *unused) | 291 | notrace static void __cpuinit start_secondary(void *unused) |
| 291 | { | 292 | { |
| 292 | /* | 293 | /* |
| 293 | * Don't put *anything* before cpu_init(), SMP booting is too | 294 | * Don't put *anything* before cpu_init(), SMP booting is too |
| @@ -534,7 +535,7 @@ static void impress_friends(void) | |||
| 534 | pr_debug("Before bogocount - setting activated=1.\n"); | 535 | pr_debug("Before bogocount - setting activated=1.\n"); |
| 535 | } | 536 | } |
| 536 | 537 | ||
| 537 | static inline void __inquire_remote_apic(int apicid) | 538 | void __inquire_remote_apic(int apicid) |
| 538 | { | 539 | { |
| 539 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 540 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
| 540 | char *names[] = { "ID", "VERSION", "SPIV" }; | 541 | char *names[] = { "ID", "VERSION", "SPIV" }; |
| @@ -573,14 +574,13 @@ static inline void __inquire_remote_apic(int apicid) | |||
| 573 | } | 574 | } |
| 574 | } | 575 | } |
| 575 | 576 | ||
| 576 | #ifdef WAKE_SECONDARY_VIA_NMI | ||
| 577 | /* | 577 | /* |
| 578 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal | 578 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal |
| 579 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this | 579 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this |
| 580 | * won't ... remember to clear down the APIC, etc later. | 580 | * won't ... remember to clear down the APIC, etc later. |
| 581 | */ | 581 | */ |
| 582 | static int __devinit | 582 | int __devinit |
| 583 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | 583 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) |
| 584 | { | 584 | { |
| 585 | unsigned long send_status, accept_status = 0; | 585 | unsigned long send_status, accept_status = 0; |
| 586 | int maxlvt; | 586 | int maxlvt; |
| @@ -597,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
| 597 | * Give the other CPU some time to accept the IPI. | 597 | * Give the other CPU some time to accept the IPI. |
| 598 | */ | 598 | */ |
| 599 | udelay(200); | 599 | udelay(200); |
| 600 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | 600 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
| 601 | maxlvt = lapic_get_maxlvt(); | 601 | maxlvt = lapic_get_maxlvt(); |
| 602 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 602 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
| 603 | apic_write(APIC_ESR, 0); | 603 | apic_write(APIC_ESR, 0); |
| @@ -612,11 +612,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
| 612 | 612 | ||
| 613 | return (send_status | accept_status); | 613 | return (send_status | accept_status); |
| 614 | } | 614 | } |
| 615 | #endif /* WAKE_SECONDARY_VIA_NMI */ | ||
| 616 | 615 | ||
| 617 | #ifdef WAKE_SECONDARY_VIA_INIT | 616 | int __devinit |
| 618 | static int __devinit | 617 | wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) |
| 619 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | ||
| 620 | { | 618 | { |
| 621 | unsigned long send_status, accept_status = 0; | 619 | unsigned long send_status, accept_status = 0; |
| 622 | int maxlvt, num_starts, j; | 620 | int maxlvt, num_starts, j; |
| @@ -735,7 +733,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
| 735 | 733 | ||
| 736 | return (send_status | accept_status); | 734 | return (send_status | accept_status); |
| 737 | } | 735 | } |
| 738 | #endif /* WAKE_SECONDARY_VIA_INIT */ | ||
| 739 | 736 | ||
| 740 | struct create_idle { | 737 | struct create_idle { |
| 741 | struct work_struct work; | 738 | struct work_struct work; |
| @@ -1084,8 +1081,10 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
| 1084 | #endif | 1081 | #endif |
| 1085 | 1082 | ||
| 1086 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 1083 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
| 1087 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" | 1084 | printk(KERN_WARNING |
| 1088 | "by the BIOS.\n", hard_smp_processor_id()); | 1085 | "weird, boot CPU (#%d) not listed by the BIOS.\n", |
| 1086 | hard_smp_processor_id()); | ||
| 1087 | |||
| 1089 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 1088 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
| 1090 | } | 1089 | } |
| 1091 | 1090 | ||
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c3..10786af95545 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 7 | #include <linux/stacktrace.h> | 7 | #include <linux/stacktrace.h> |
| 8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| 9 | #include <linux/uaccess.h> | ||
| 9 | #include <asm/stacktrace.h> | 10 | #include <asm/stacktrace.h> |
| 10 | 11 | ||
| 11 | static void save_stack_warning(void *data, char *msg) | 12 | static void save_stack_warning(void *data, char *msg) |
| @@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | |||
| 83 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 84 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
| 84 | } | 85 | } |
| 85 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | 86 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); |
| 87 | |||
| 88 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ | ||
| 89 | |||
| 90 | struct stack_frame { | ||
| 91 | const void __user *next_fp; | ||
| 92 | unsigned long ret_addr; | ||
| 93 | }; | ||
| 94 | |||
| 95 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
| 96 | { | ||
| 97 | int ret; | ||
| 98 | |||
| 99 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
| 100 | return 0; | ||
| 101 | |||
| 102 | ret = 1; | ||
| 103 | pagefault_disable(); | ||
| 104 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
| 105 | ret = 0; | ||
| 106 | pagefault_enable(); | ||
| 107 | |||
| 108 | return ret; | ||
| 109 | } | ||
| 110 | |||
| 111 | static inline void __save_stack_trace_user(struct stack_trace *trace) | ||
| 112 | { | ||
| 113 | const struct pt_regs *regs = task_pt_regs(current); | ||
| 114 | const void __user *fp = (const void __user *)regs->bp; | ||
| 115 | |||
| 116 | if (trace->nr_entries < trace->max_entries) | ||
| 117 | trace->entries[trace->nr_entries++] = regs->ip; | ||
| 118 | |||
| 119 | while (trace->nr_entries < trace->max_entries) { | ||
| 120 | struct stack_frame frame; | ||
| 121 | |||
| 122 | frame.next_fp = NULL; | ||
| 123 | frame.ret_addr = 0; | ||
| 124 | if (!copy_stack_frame(fp, &frame)) | ||
| 125 | break; | ||
| 126 | if ((unsigned long)fp < regs->sp) | ||
| 127 | break; | ||
| 128 | if (frame.ret_addr) { | ||
| 129 | trace->entries[trace->nr_entries++] = | ||
| 130 | frame.ret_addr; | ||
| 131 | } | ||
| 132 | if (fp == frame.next_fp) | ||
| 133 | break; | ||
| 134 | fp = frame.next_fp; | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | void save_stack_trace_user(struct stack_trace *trace) | ||
| 139 | { | ||
| 140 | /* | ||
| 141 | * Trace user stack if we are not a kernel thread | ||
| 142 | */ | ||
| 143 | if (current->mm) { | ||
| 144 | __save_stack_trace_user(trace); | ||
| 145 | } | ||
| 146 | if (trace->nr_entries < trace->max_entries) | ||
| 147 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
| 148 | } | ||
| 149 | |||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 77b400f06ea2..65309e4cb1c0 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
| @@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc); | |||
| 75 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 75 | irqreturn_t timer_interrupt(int irq, void *dev_id) |
| 76 | { | 76 | { |
| 77 | /* Keep nmi watchdog up to date */ | 77 | /* Keep nmi watchdog up to date */ |
| 78 | per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; | 78 | inc_irq_stat(irq0_irqs); |
| 79 | 79 | ||
| 80 | #ifdef CONFIG_X86_IO_APIC | 80 | #ifdef CONFIG_X86_IO_APIC |
| 81 | if (timer_ack) { | 81 | if (timer_ack) { |
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index cb19d650c216..891e7a7c4334 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c | |||
| @@ -49,9 +49,9 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
| 49 | } | 49 | } |
| 50 | EXPORT_SYMBOL(profile_pc); | 50 | EXPORT_SYMBOL(profile_pc); |
| 51 | 51 | ||
| 52 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 52 | static irqreturn_t timer_interrupt(int irq, void *dev_id) |
| 53 | { | 53 | { |
| 54 | add_pda(irq0_irqs, 1); | 54 | inc_irq_stat(irq0_irqs); |
| 55 | 55 | ||
| 56 | global_clock_event->event_handler(global_clock_event); | 56 | global_clock_event->event_handler(global_clock_event); |
| 57 | 57 | ||
| @@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void) | |||
| 80 | break; | 80 | break; |
| 81 | no_ctr_free = (i == 4); | 81 | no_ctr_free = (i == 4); |
| 82 | if (no_ctr_free) { | 82 | if (no_ctr_free) { |
| 83 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
| 84 | "cpu_khz value may be incorrect.\n"); | ||
| 83 | i = 3; | 85 | i = 3; |
| 84 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | 86 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); |
| 85 | wrmsrl(MSR_K7_EVNTSEL3, 0); | 87 | wrmsrl(MSR_K7_EVNTSEL3, 0); |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index f4049f3513b6..8da059f949be 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
| @@ -34,9 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock); | |||
| 34 | */ | 34 | */ |
| 35 | void leave_mm(int cpu) | 35 | void leave_mm(int cpu) |
| 36 | { | 36 | { |
| 37 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | 37 | BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); |
| 38 | BUG(); | 38 | cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); |
| 39 | cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); | ||
| 40 | load_cr3(swapper_pg_dir); | 39 | load_cr3(swapper_pg_dir); |
| 41 | } | 40 | } |
| 42 | EXPORT_SYMBOL_GPL(leave_mm); | 41 | EXPORT_SYMBOL_GPL(leave_mm); |
| @@ -104,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) | |||
| 104 | * BUG(); | 103 | * BUG(); |
| 105 | */ | 104 | */ |
| 106 | 105 | ||
| 107 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { | 106 | if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { |
| 108 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { | 107 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { |
| 109 | if (flush_va == TLB_FLUSH_ALL) | 108 | if (flush_va == TLB_FLUSH_ALL) |
| 110 | local_flush_tlb(); | 109 | local_flush_tlb(); |
| 111 | else | 110 | else |
| @@ -119,7 +118,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) | |||
| 119 | smp_mb__after_clear_bit(); | 118 | smp_mb__after_clear_bit(); |
| 120 | out: | 119 | out: |
| 121 | put_cpu_no_resched(); | 120 | put_cpu_no_resched(); |
| 122 | __get_cpu_var(irq_stat).irq_tlb_count++; | 121 | inc_irq_stat(irq_tlb_count); |
| 123 | } | 122 | } |
| 124 | 123 | ||
| 125 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | 124 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
| @@ -238,7 +237,7 @@ static void do_flush_tlb_all(void *info) | |||
| 238 | unsigned long cpu = smp_processor_id(); | 237 | unsigned long cpu = smp_processor_id(); |
| 239 | 238 | ||
| 240 | __flush_tlb_all(); | 239 | __flush_tlb_all(); |
| 241 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) | 240 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) |
| 242 | leave_mm(cpu); | 241 | leave_mm(cpu); |
| 243 | } | 242 | } |
| 244 | 243 | ||
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 8f919ca69494..29887d7081a9 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c | |||
| @@ -154,7 +154,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | |||
| 154 | out: | 154 | out: |
| 155 | ack_APIC_irq(); | 155 | ack_APIC_irq(); |
| 156 | cpu_clear(cpu, f->flush_cpumask); | 156 | cpu_clear(cpu, f->flush_cpumask); |
| 157 | add_pda(irq_tlb_count, 1); | 157 | inc_irq_stat(irq_tlb_count); |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | 160 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 04431f34fd16..6a00e5faaa74 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
| @@ -566,14 +566,10 @@ static int __init uv_ptc_init(void) | |||
| 566 | if (!is_uv_system()) | 566 | if (!is_uv_system()) |
| 567 | return 0; | 567 | return 0; |
| 568 | 568 | ||
| 569 | if (!proc_mkdir("sgi_uv", NULL)) | ||
| 570 | return -EINVAL; | ||
| 571 | |||
| 572 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); | 569 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); |
| 573 | if (!proc_uv_ptc) { | 570 | if (!proc_uv_ptc) { |
| 574 | printk(KERN_ERR "unable to create %s proc entry\n", | 571 | printk(KERN_ERR "unable to create %s proc entry\n", |
| 575 | UV_PTC_BASENAME); | 572 | UV_PTC_BASENAME); |
| 576 | remove_proc_entry("sgi_uv", NULL); | ||
| 577 | return -EINVAL; | 573 | return -EINVAL; |
| 578 | } | 574 | } |
| 579 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; | 575 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 1106fac6024d..808031a5ba19 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
| @@ -1,10 +1,26 @@ | |||
| 1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
| 2 | 2 | ||
| 3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
| 4 | #include <asm/e820.h> | ||
| 4 | 5 | ||
| 5 | /* ready for x86_64 and x86 */ | 6 | /* ready for x86_64 and x86 */ |
| 6 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | 7 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); |
| 7 | 8 | ||
| 9 | void __init reserve_trampoline_memory(void) | ||
| 10 | { | ||
| 11 | #ifdef CONFIG_X86_32 | ||
| 12 | /* | ||
| 13 | * But first pinch a few for the stack/trampoline stuff | ||
| 14 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
| 15 | * trampoline before removing it. (see the GDT stuff) | ||
| 16 | */ | ||
| 17 | reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); | ||
| 18 | #endif | ||
| 19 | /* Has to be in very low memory so we can execute real-mode AP code. */ | ||
| 20 | reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, | ||
| 21 | "TRAMPOLINE"); | ||
| 22 | } | ||
| 23 | |||
| 8 | /* | 24 | /* |
| 9 | * Currently trivial. Write the real->protected mode | 25 | * Currently trivial. Write the real->protected mode |
| 10 | * bootstrap into the page concerned. The caller | 26 | * bootstrap into the page concerned. The caller |
| @@ -12,7 +28,6 @@ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | |||
| 12 | */ | 28 | */ |
| 13 | unsigned long setup_trampoline(void) | 29 | unsigned long setup_trampoline(void) |
| 14 | { | 30 | { |
| 15 | memcpy(trampoline_base, trampoline_data, | 31 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
| 16 | trampoline_end - trampoline_data); | ||
| 17 | return virt_to_phys(trampoline_base); | 32 | return virt_to_phys(trampoline_base); |
| 18 | } | 33 | } |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 04d242ab0161..141907ab6e22 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -481,11 +481,7 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
| 481 | { | 481 | { |
| 482 | nmi_enter(); | 482 | nmi_enter(); |
| 483 | 483 | ||
| 484 | #ifdef CONFIG_X86_32 | 484 | inc_irq_stat(__nmi_count); |
| 485 | { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } | ||
| 486 | #else | ||
| 487 | add_pda(__nmi_count, 1); | ||
| 488 | #endif | ||
| 489 | 485 | ||
| 490 | if (!ignore_nmis) | 486 | if (!ignore_nmis) |
| 491 | default_do_nmi(regs); | 487 | default_do_nmi(regs); |
| @@ -664,7 +660,7 @@ void math_error(void __user *ip) | |||
| 664 | { | 660 | { |
| 665 | struct task_struct *task; | 661 | struct task_struct *task; |
| 666 | siginfo_t info; | 662 | siginfo_t info; |
| 667 | unsigned short cwd, swd; | 663 | unsigned short cwd, swd, err; |
| 668 | 664 | ||
| 669 | /* | 665 | /* |
| 670 | * Save the info for the exception handler and clear the error. | 666 | * Save the info for the exception handler and clear the error. |
| @@ -675,7 +671,6 @@ void math_error(void __user *ip) | |||
| 675 | task->thread.error_code = 0; | 671 | task->thread.error_code = 0; |
| 676 | info.si_signo = SIGFPE; | 672 | info.si_signo = SIGFPE; |
| 677 | info.si_errno = 0; | 673 | info.si_errno = 0; |
| 678 | info.si_code = __SI_FAULT; | ||
| 679 | info.si_addr = ip; | 674 | info.si_addr = ip; |
| 680 | /* | 675 | /* |
| 681 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 676 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
| @@ -689,34 +684,31 @@ void math_error(void __user *ip) | |||
| 689 | */ | 684 | */ |
| 690 | cwd = get_fpu_cwd(task); | 685 | cwd = get_fpu_cwd(task); |
| 691 | swd = get_fpu_swd(task); | 686 | swd = get_fpu_swd(task); |
| 692 | switch (swd & ~cwd & 0x3f) { | 687 | |
| 693 | case 0x000: /* No unmasked exception */ | 688 | err = swd & ~cwd & 0x3f; |
| 689 | |||
| 694 | #ifdef CONFIG_X86_32 | 690 | #ifdef CONFIG_X86_32 |
| 691 | if (!err) | ||
| 695 | return; | 692 | return; |
| 696 | #endif | 693 | #endif |
| 697 | default: /* Multiple exceptions */ | 694 | |
| 698 | break; | 695 | if (err & 0x001) { /* Invalid op */ |
| 699 | case 0x001: /* Invalid Op */ | ||
| 700 | /* | 696 | /* |
| 701 | * swd & 0x240 == 0x040: Stack Underflow | 697 | * swd & 0x240 == 0x040: Stack Underflow |
| 702 | * swd & 0x240 == 0x240: Stack Overflow | 698 | * swd & 0x240 == 0x240: Stack Overflow |
| 703 | * User must clear the SF bit (0x40) if set | 699 | * User must clear the SF bit (0x40) if set |
| 704 | */ | 700 | */ |
| 705 | info.si_code = FPE_FLTINV; | 701 | info.si_code = FPE_FLTINV; |
| 706 | break; | 702 | } else if (err & 0x004) { /* Divide by Zero */ |
| 707 | case 0x002: /* Denormalize */ | ||
| 708 | case 0x010: /* Underflow */ | ||
| 709 | info.si_code = FPE_FLTUND; | ||
| 710 | break; | ||
| 711 | case 0x004: /* Zero Divide */ | ||
| 712 | info.si_code = FPE_FLTDIV; | 703 | info.si_code = FPE_FLTDIV; |
| 713 | break; | 704 | } else if (err & 0x008) { /* Overflow */ |
| 714 | case 0x008: /* Overflow */ | ||
| 715 | info.si_code = FPE_FLTOVF; | 705 | info.si_code = FPE_FLTOVF; |
| 716 | break; | 706 | } else if (err & 0x012) { /* Denormal, Underflow */ |
| 717 | case 0x020: /* Precision */ | 707 | info.si_code = FPE_FLTUND; |
| 708 | } else if (err & 0x020) { /* Precision */ | ||
| 718 | info.si_code = FPE_FLTRES; | 709 | info.si_code = FPE_FLTRES; |
| 719 | break; | 710 | } else { |
| 711 | info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ | ||
| 720 | } | 712 | } |
| 721 | force_sig_info(SIGFPE, &info, task); | 713 | force_sig_info(SIGFPE, &info, task); |
| 722 | } | 714 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 424093b157d3..599e58168631 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <asm/vgtod.h> | 15 | #include <asm/vgtod.h> |
| 16 | #include <asm/time.h> | 16 | #include <asm/time.h> |
| 17 | #include <asm/delay.h> | 17 | #include <asm/delay.h> |
| 18 | #include <asm/hypervisor.h> | ||
| 18 | 19 | ||
| 19 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | 20 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ |
| 20 | EXPORT_SYMBOL(cpu_khz); | 21 | EXPORT_SYMBOL(cpu_khz); |
| @@ -31,6 +32,7 @@ static int tsc_unstable; | |||
| 31 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 32 | erroneous rdtsc usage on !cpu_has_tsc processors */ |
| 32 | static int tsc_disabled = -1; | 33 | static int tsc_disabled = -1; |
| 33 | 34 | ||
| 35 | static int tsc_clocksource_reliable; | ||
| 34 | /* | 36 | /* |
| 35 | * Scheduler clock - returns current time in nanosec units. | 37 | * Scheduler clock - returns current time in nanosec units. |
| 36 | */ | 38 | */ |
| @@ -98,6 +100,15 @@ int __init notsc_setup(char *str) | |||
| 98 | 100 | ||
| 99 | __setup("notsc", notsc_setup); | 101 | __setup("notsc", notsc_setup); |
| 100 | 102 | ||
| 103 | static int __init tsc_setup(char *str) | ||
| 104 | { | ||
| 105 | if (!strcmp(str, "reliable")) | ||
| 106 | tsc_clocksource_reliable = 1; | ||
| 107 | return 1; | ||
| 108 | } | ||
| 109 | |||
| 110 | __setup("tsc=", tsc_setup); | ||
| 111 | |||
| 101 | #define MAX_RETRIES 5 | 112 | #define MAX_RETRIES 5 |
| 102 | #define SMI_TRESHOLD 50000 | 113 | #define SMI_TRESHOLD 50000 |
| 103 | 114 | ||
| @@ -352,9 +363,15 @@ unsigned long native_calibrate_tsc(void) | |||
| 352 | { | 363 | { |
| 353 | u64 tsc1, tsc2, delta, ref1, ref2; | 364 | u64 tsc1, tsc2, delta, ref1, ref2; |
| 354 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 365 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; |
| 355 | unsigned long flags, latch, ms, fast_calibrate; | 366 | unsigned long flags, latch, ms, fast_calibrate, tsc_khz; |
| 356 | int hpet = is_hpet_enabled(), i, loopmin; | 367 | int hpet = is_hpet_enabled(), i, loopmin; |
| 357 | 368 | ||
| 369 | tsc_khz = get_hypervisor_tsc_freq(); | ||
| 370 | if (tsc_khz) { | ||
| 371 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); | ||
| 372 | return tsc_khz; | ||
| 373 | } | ||
| 374 | |||
| 358 | local_irq_save(flags); | 375 | local_irq_save(flags); |
| 359 | fast_calibrate = quick_pit_calibrate(); | 376 | fast_calibrate = quick_pit_calibrate(); |
| 360 | local_irq_restore(flags); | 377 | local_irq_restore(flags); |
| @@ -731,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | |||
| 731 | {} | 748 | {} |
| 732 | }; | 749 | }; |
| 733 | 750 | ||
| 734 | /* | 751 | static void __init check_system_tsc_reliable(void) |
| 735 | * Geode_LX - the OLPC CPU has a possibly a very reliable TSC | 752 | { |
| 736 | */ | ||
| 737 | #ifdef CONFIG_MGEODE_LX | 753 | #ifdef CONFIG_MGEODE_LX |
| 738 | /* RTSC counts during suspend */ | 754 | /* RTSC counts during suspend */ |
| 739 | #define RTSC_SUSP 0x100 | 755 | #define RTSC_SUSP 0x100 |
| 740 | |||
| 741 | static void __init check_geode_tsc_reliable(void) | ||
| 742 | { | ||
| 743 | unsigned long res_low, res_high; | 756 | unsigned long res_low, res_high; |
| 744 | 757 | ||
| 745 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | 758 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); |
| 759 | /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ | ||
| 746 | if (res_low & RTSC_SUSP) | 760 | if (res_low & RTSC_SUSP) |
| 747 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 761 | tsc_clocksource_reliable = 1; |
| 748 | } | ||
| 749 | #else | ||
| 750 | static inline void check_geode_tsc_reliable(void) { } | ||
| 751 | #endif | 762 | #endif |
| 763 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) | ||
| 764 | tsc_clocksource_reliable = 1; | ||
| 765 | } | ||
| 752 | 766 | ||
| 753 | /* | 767 | /* |
| 754 | * Make an educated guess if the TSC is trustworthy and synchronized | 768 | * Make an educated guess if the TSC is trustworthy and synchronized |
| @@ -783,6 +797,8 @@ static void __init init_tsc_clocksource(void) | |||
| 783 | { | 797 | { |
| 784 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | 798 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, |
| 785 | clocksource_tsc.shift); | 799 | clocksource_tsc.shift); |
| 800 | if (tsc_clocksource_reliable) | ||
| 801 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | ||
| 786 | /* lower the rating if we already know its unstable: */ | 802 | /* lower the rating if we already know its unstable: */ |
| 787 | if (check_tsc_unstable()) { | 803 | if (check_tsc_unstable()) { |
| 788 | clocksource_tsc.rating = 0; | 804 | clocksource_tsc.rating = 0; |
| @@ -843,7 +859,7 @@ void __init tsc_init(void) | |||
| 843 | if (unsynchronized_tsc()) | 859 | if (unsynchronized_tsc()) |
| 844 | mark_tsc_unstable("TSCs unsynchronized"); | 860 | mark_tsc_unstable("TSCs unsynchronized"); |
| 845 | 861 | ||
| 846 | check_geode_tsc_reliable(); | 862 | check_system_tsc_reliable(); |
| 847 | init_tsc_clocksource(); | 863 | init_tsc_clocksource(); |
| 848 | } | 864 | } |
| 849 | 865 | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 1c0dfbca87c1..bf36328f6ef9 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
| @@ -112,6 +112,12 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
| 112 | if (unsynchronized_tsc()) | 112 | if (unsynchronized_tsc()) |
| 113 | return; | 113 | return; |
| 114 | 114 | ||
| 115 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | ||
| 116 | printk(KERN_INFO | ||
| 117 | "Skipping synchronization checks as TSC is reliable.\n"); | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | |||
| 115 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", | 121 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", |
| 116 | smp_processor_id(), cpu); | 122 | smp_processor_id(), cpu); |
| 117 | 123 | ||
| @@ -165,7 +171,7 @@ void __cpuinit check_tsc_sync_target(void) | |||
| 165 | { | 171 | { |
| 166 | int cpus = 2; | 172 | int cpus = 2; |
| 167 | 173 | ||
| 168 | if (unsynchronized_tsc()) | 174 | if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) |
| 169 | return; | 175 | return; |
| 170 | 176 | ||
| 171 | /* | 177 | /* |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 22fd6577156a..23206ba16874 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
| @@ -266,109 +266,6 @@ static void vmi_nop(void) | |||
| 266 | { | 266 | { |
| 267 | } | 267 | } |
| 268 | 268 | ||
| 269 | #ifdef CONFIG_DEBUG_PAGE_TYPE | ||
| 270 | |||
| 271 | #ifdef CONFIG_X86_PAE | ||
| 272 | #define MAX_BOOT_PTS (2048+4+1) | ||
| 273 | #else | ||
| 274 | #define MAX_BOOT_PTS (1024+1) | ||
| 275 | #endif | ||
| 276 | |||
| 277 | /* | ||
| 278 | * During boot, mem_map is not yet available in paging_init, so stash | ||
| 279 | * all the boot page allocations here. | ||
| 280 | */ | ||
| 281 | static struct { | ||
| 282 | u32 pfn; | ||
| 283 | int type; | ||
| 284 | } boot_page_allocations[MAX_BOOT_PTS]; | ||
| 285 | static int num_boot_page_allocations; | ||
| 286 | static int boot_allocations_applied; | ||
| 287 | |||
| 288 | void vmi_apply_boot_page_allocations(void) | ||
| 289 | { | ||
| 290 | int i; | ||
| 291 | BUG_ON(!mem_map); | ||
| 292 | for (i = 0; i < num_boot_page_allocations; i++) { | ||
| 293 | struct page *page = pfn_to_page(boot_page_allocations[i].pfn); | ||
| 294 | page->type = boot_page_allocations[i].type; | ||
| 295 | page->type = boot_page_allocations[i].type & | ||
| 296 | ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
| 297 | } | ||
| 298 | boot_allocations_applied = 1; | ||
| 299 | } | ||
| 300 | |||
| 301 | static void record_page_type(u32 pfn, int type) | ||
| 302 | { | ||
| 303 | BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS); | ||
| 304 | boot_page_allocations[num_boot_page_allocations].pfn = pfn; | ||
| 305 | boot_page_allocations[num_boot_page_allocations].type = type; | ||
| 306 | num_boot_page_allocations++; | ||
| 307 | } | ||
| 308 | |||
| 309 | static void check_zeroed_page(u32 pfn, int type, struct page *page) | ||
| 310 | { | ||
| 311 | u32 *ptr; | ||
| 312 | int i; | ||
| 313 | int limit = PAGE_SIZE / sizeof(int); | ||
| 314 | |||
| 315 | if (page_address(page)) | ||
| 316 | ptr = (u32 *)page_address(page); | ||
| 317 | else | ||
| 318 | ptr = (u32 *)__va(pfn << PAGE_SHIFT); | ||
| 319 | /* | ||
| 320 | * When cloning the root in non-PAE mode, only the userspace | ||
| 321 | * pdes need to be zeroed. | ||
| 322 | */ | ||
| 323 | if (type & VMI_PAGE_CLONE) | ||
| 324 | limit = KERNEL_PGD_BOUNDARY; | ||
| 325 | for (i = 0; i < limit; i++) | ||
| 326 | BUG_ON(ptr[i]); | ||
| 327 | } | ||
| 328 | |||
| 329 | /* | ||
| 330 | * We stash the page type into struct page so we can verify the page | ||
| 331 | * types are used properly. | ||
| 332 | */ | ||
| 333 | static void vmi_set_page_type(u32 pfn, int type) | ||
| 334 | { | ||
| 335 | /* PAE can have multiple roots per page - don't track */ | ||
| 336 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
| 337 | return; | ||
| 338 | |||
| 339 | if (boot_allocations_applied) { | ||
| 340 | struct page *page = pfn_to_page(pfn); | ||
| 341 | if (type != VMI_PAGE_NORMAL) | ||
| 342 | BUG_ON(page->type); | ||
| 343 | else | ||
| 344 | BUG_ON(page->type == VMI_PAGE_NORMAL); | ||
| 345 | page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
| 346 | if (type & VMI_PAGE_ZEROED) | ||
| 347 | check_zeroed_page(pfn, type, page); | ||
| 348 | } else { | ||
| 349 | record_page_type(pfn, type); | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | static void vmi_check_page_type(u32 pfn, int type) | ||
| 354 | { | ||
| 355 | /* PAE can have multiple roots per page - skip checks */ | ||
| 356 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
| 357 | return; | ||
| 358 | |||
| 359 | type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
| 360 | if (boot_allocations_applied) { | ||
| 361 | struct page *page = pfn_to_page(pfn); | ||
| 362 | BUG_ON((page->type ^ type) & VMI_PAGE_PAE); | ||
| 363 | BUG_ON(type == VMI_PAGE_NORMAL && page->type); | ||
| 364 | BUG_ON((type & page->type) == 0); | ||
| 365 | } | ||
| 366 | } | ||
| 367 | #else | ||
| 368 | #define vmi_set_page_type(p,t) do { } while (0) | ||
| 369 | #define vmi_check_page_type(p,t) do { } while (0) | ||
| 370 | #endif | ||
| 371 | |||
| 372 | #ifdef CONFIG_HIGHPTE | 269 | #ifdef CONFIG_HIGHPTE |
| 373 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | 270 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) |
| 374 | { | 271 | { |
| @@ -395,7 +292,6 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | |||
| 395 | 292 | ||
| 396 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | 293 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
| 397 | { | 294 | { |
| 398 | vmi_set_page_type(pfn, VMI_PAGE_L1); | ||
| 399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 295 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
| 400 | } | 296 | } |
| 401 | 297 | ||
| @@ -406,27 +302,22 @@ static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) | |||
| 406 | * It is called only for swapper_pg_dir, which already has | 302 | * It is called only for swapper_pg_dir, which already has |
| 407 | * data on it. | 303 | * data on it. |
| 408 | */ | 304 | */ |
| 409 | vmi_set_page_type(pfn, VMI_PAGE_L2); | ||
| 410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | 305 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); |
| 411 | } | 306 | } |
| 412 | 307 | ||
| 413 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) | 308 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) |
| 414 | { | 309 | { |
| 415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | ||
| 416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | ||
| 417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | 310 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); |
| 418 | } | 311 | } |
| 419 | 312 | ||
| 420 | static void vmi_release_pte(unsigned long pfn) | 313 | static void vmi_release_pte(unsigned long pfn) |
| 421 | { | 314 | { |
| 422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | 315 | vmi_ops.release_page(pfn, VMI_PAGE_L1); |
| 423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
| 424 | } | 316 | } |
| 425 | 317 | ||
| 426 | static void vmi_release_pmd(unsigned long pfn) | 318 | static void vmi_release_pmd(unsigned long pfn) |
| 427 | { | 319 | { |
| 428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | 320 | vmi_ops.release_page(pfn, VMI_PAGE_L2); |
| 429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
| 430 | } | 321 | } |
| 431 | 322 | ||
| 432 | /* | 323 | /* |
| @@ -450,26 +341,22 @@ static void vmi_release_pmd(unsigned long pfn) | |||
| 450 | 341 | ||
| 451 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 342 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
| 452 | { | 343 | { |
| 453 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
| 454 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 344 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
| 455 | } | 345 | } |
| 456 | 346 | ||
| 457 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 347 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
| 458 | { | 348 | { |
| 459 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
| 460 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | 349 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); |
| 461 | } | 350 | } |
| 462 | 351 | ||
| 463 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | 352 | static void vmi_set_pte(pte_t *ptep, pte_t pte) |
| 464 | { | 353 | { |
| 465 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | 354 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ |
| 466 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD); | ||
| 467 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | 355 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); |
| 468 | } | 356 | } |
| 469 | 357 | ||
| 470 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 358 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
| 471 | { | 359 | { |
| 472 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
| 473 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 360 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
| 474 | } | 361 | } |
| 475 | 362 | ||
| @@ -477,10 +364,8 @@ static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
| 477 | { | 364 | { |
| 478 | #ifdef CONFIG_X86_PAE | 365 | #ifdef CONFIG_X86_PAE |
| 479 | const pte_t pte = { .pte = pmdval.pmd }; | 366 | const pte_t pte = { .pte = pmdval.pmd }; |
| 480 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
| 481 | #else | 367 | #else |
| 482 | const pte_t pte = { pmdval.pud.pgd.pgd }; | 368 | const pte_t pte = { pmdval.pud.pgd.pgd }; |
| 483 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
| 484 | #endif | 369 | #endif |
| 485 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | 370 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); |
| 486 | } | 371 | } |
| @@ -502,7 +387,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | |||
| 502 | 387 | ||
| 503 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 388 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
| 504 | { | 389 | { |
| 505 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
| 506 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); | 390 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); |
| 507 | } | 391 | } |
| 508 | 392 | ||
| @@ -510,21 +394,18 @@ static void vmi_set_pud(pud_t *pudp, pud_t pudval) | |||
| 510 | { | 394 | { |
| 511 | /* Um, eww */ | 395 | /* Um, eww */ |
| 512 | const pte_t pte = { .pte = pudval.pgd.pgd }; | 396 | const pte_t pte = { .pte = pudval.pgd.pgd }; |
| 513 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
| 514 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | 397 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); |
| 515 | } | 398 | } |
| 516 | 399 | ||
| 517 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 400 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
| 518 | { | 401 | { |
| 519 | const pte_t pte = { .pte = 0 }; | 402 | const pte_t pte = { .pte = 0 }; |
| 520 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
| 521 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 403 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
| 522 | } | 404 | } |
| 523 | 405 | ||
| 524 | static void vmi_pmd_clear(pmd_t *pmd) | 406 | static void vmi_pmd_clear(pmd_t *pmd) |
| 525 | { | 407 | { |
| 526 | const pte_t pte = { .pte = 0 }; | 408 | const pte_t pte = { .pte = 0 }; |
| 527 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
| 528 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | 409 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); |
| 529 | } | 410 | } |
| 530 | #endif | 411 | #endif |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index a9b8560adbc2..82c67559dde7 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
| @@ -44,6 +44,7 @@ SECTIONS | |||
| 44 | SCHED_TEXT | 44 | SCHED_TEXT |
| 45 | LOCK_TEXT | 45 | LOCK_TEXT |
| 46 | KPROBES_TEXT | 46 | KPROBES_TEXT |
| 47 | IRQENTRY_TEXT | ||
| 47 | *(.fixup) | 48 | *(.fixup) |
| 48 | *(.gnu.warning) | 49 | *(.gnu.warning) |
| 49 | _etext = .; /* End of text section */ | 50 | _etext = .; /* End of text section */ |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 46e05447405b..1a614c0e6bef 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
| @@ -35,6 +35,7 @@ SECTIONS | |||
| 35 | SCHED_TEXT | 35 | SCHED_TEXT |
| 36 | LOCK_TEXT | 36 | LOCK_TEXT |
| 37 | KPROBES_TEXT | 37 | KPROBES_TEXT |
| 38 | IRQENTRY_TEXT | ||
| 38 | *(.fixup) | 39 | *(.fixup) |
| 39 | *(.gnu.warning) | 40 | *(.gnu.warning) |
| 40 | _etext = .; /* End of text section */ | 41 | _etext = .; /* End of text section */ |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..44153afc9067 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
| @@ -17,6 +17,9 @@ | |||
| 17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. | 17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. |
| 18 | */ | 18 | */ |
| 19 | 19 | ||
| 20 | /* Disable profiling for userspace code: */ | ||
| 21 | #define DISABLE_BRANCH_PROFILING | ||
| 22 | |||
| 20 | #include <linux/time.h> | 23 | #include <linux/time.h> |
| 21 | #include <linux/init.h> | 24 | #include <linux/init.h> |
| 22 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
| @@ -128,7 +131,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
| 128 | gettimeofday(tv,NULL); | 131 | gettimeofday(tv,NULL); |
| 129 | return; | 132 | return; |
| 130 | } | 133 | } |
| 134 | |||
| 135 | /* | ||
| 136 | * Surround the RDTSC by barriers, to make sure it's not | ||
| 137 | * speculated to outside the seqlock critical section and | ||
| 138 | * does not cause time warps: | ||
| 139 | */ | ||
| 140 | rdtsc_barrier(); | ||
| 131 | now = vread(); | 141 | now = vread(); |
| 142 | rdtsc_barrier(); | ||
| 143 | |||
| 132 | base = __vsyscall_gtod_data.clock.cycle_last; | 144 | base = __vsyscall_gtod_data.clock.cycle_last; |
| 133 | mask = __vsyscall_gtod_data.clock.mask; | 145 | mask = __vsyscall_gtod_data.clock.mask; |
| 134 | mult = __vsyscall_gtod_data.clock.mult; | 146 | mult = __vsyscall_gtod_data.clock.mult; |
